feat: add clear_kv_cache to mistral and qmistral models (#1464)

2025-06-18 19:47:12 +00:00 · 2023-12-21 15:19:19 -05:00
parent 563a79afa1
commit f6408a3779
2 changed files with 28 additions and 0 deletions
--- a/candle-transformers/src/models/mistral.rs
+++ b/candle-transformers/src/models/mistral.rs
@ -297,6 +297,10 @@ impl Attention {
            .reshape((b_sz, q_len, self.hidden_size))?
            .apply(&self.o_proj)
    }
    fn clear_kv_cache(&mut self) {
        self.kv_cache = None
    }
 }
 #[derive(Debug, Clone)]
@ -340,6 +344,10 @@ impl DecoderLayer {
        let xs = xs.apply(&self.post_attention_layernorm)?.apply(&self.mlp)?;
        residual + xs
    }
    fn clear_kv_cache(&mut self) {
        self.self_attn.clear_kv_cache()
    }
 }
 #[derive(Debug, Clone)]
@ -423,4 +431,10 @@ impl Model {
            .apply(&self.norm)?
            .apply(&self.lm_head)
    }
    pub fn clear_kv_cache(&mut self) {
        for layer in self.layers.iter_mut() {
            layer.clear_kv_cache()
        }
    }
 }
--- a/candle-transformers/src/models/quantized_mistral.rs
+++ b/candle-transformers/src/models/quantized_mistral.rs
@ -198,6 +198,10 @@ impl Attention {
            .reshape((b_sz, q_len, self.hidden_size))?
            .apply(&self.o_proj)
    }
    fn clear_kv_cache(&mut self) {
        self.kv_cache = None
    }
 }
 #[derive(Debug, Clone)]
@ -241,6 +245,10 @@ impl DecoderLayer {
        let xs = xs.apply(&self.post_attention_layernorm)?.apply(&self.mlp)?;
        residual + xs
    }
    fn clear_kv_cache(&mut self) {
        self.self_attn.clear_kv_cache()
    }
 }
 #[derive(Debug, Clone)]
@ -322,4 +330,10 @@ impl Model {
            .apply(&self.norm)?
            .apply(&self.lm_head)
    }
    pub fn clear_kv_cache(&mut self) {
        for layer in self.layers.iter_mut() {
            layer.clear_kv_cache()
        }
    }
 }