mirror of
https://github.com/huggingface/candle.git
synced 2025-06-20 12:06:35 +00:00
add quantized rwkv v5 model (#1743)
* and quantized rwkv v5 model * Integrate the quantized rwkv model in the initial example. --------- Co-authored-by: laurent <laurent.mazare@gmail.com>
This commit is contained in:
@ -124,7 +124,7 @@ impl SelfAttention {
|
||||
let (b, t, s) = xs.dims3()?;
|
||||
let s = s / h;
|
||||
let (receptance, key, value, gate) = {
|
||||
// exctract key-value
|
||||
// extract key-value
|
||||
let shifted = state.per_layer[self.layer_id].extract_key_value.clone();
|
||||
let shifted = if shifted.rank() == 2 {
|
||||
shifted.unsqueeze(1)?
|
||||
@ -164,7 +164,6 @@ impl SelfAttention {
|
||||
|
||||
let mut out: Vec<Tensor> = Vec::with_capacity(t);
|
||||
for t_ in 0..t {
|
||||
//
|
||||
let rt = receptance.i((.., .., t_..t_ + 1))?.contiguous()?;
|
||||
let kt = key.i((.., .., .., t_..t_ + 1))?.contiguous()?;
|
||||
let vt = value.i((.., .., t_..t_ + 1))?.contiguous()?;
|
||||
|
Reference in New Issue
Block a user