add quantized rwkv v5 model (#1743)

* and quantized rwkv v5 model * Integrate the quantized rwkv model in the initial example. --------- Co-authored-by: laurent <laurent.mazare@gmail.com>
2025-06-20 12:06:35 +00:00 · 2024-02-26 04:43:40 +08:00
parent 1a6043af51
commit 918136ba46
4 changed files with 326 additions and 6 deletions
--- a/candle-transformers/src/models/rwkv_v5.rs
+++ b/candle-transformers/src/models/rwkv_v5.rs
@ -124,7 +124,7 @@ impl SelfAttention {
        let (b, t, s) = xs.dims3()?;
        let s = s / h;
        let (receptance, key, value, gate) = {
-            // exctract key-value
+            // extract key-value
            let shifted = state.per_layer[self.layer_id].extract_key_value.clone();
            let shifted = if shifted.rank() == 2 {
                shifted.unsqueeze(1)?
@ -164,7 +164,6 @@ impl SelfAttention {

        let mut out: Vec<Tensor> = Vec::with_capacity(t);
        for t_ in 0..t {
-            //
            let rt = receptance.i((.., .., t_..t_ + 1))?.contiguous()?;
            let kt = key.i((.., .., .., t_..t_ + 1))?.contiguous()?;
            let vt = value.i((.., .., t_..t_ + 1))?.contiguous()?;