Make tensor contiguous before the repeat_kv calls to avoid strided copies (#2953)

2025-06-16 10:38:54 +00:00 · 2025-05-14 10:47:28 +02:00
parent 485ddf2996
commit 6bd61727bc
1 changed files with 4 additions and 0 deletions
--- a/candle-transformers/src/models/quantized_qwen3.rs
+++ b/candle-transformers/src/models/quantized_qwen3.rs
@ -217,6 +217,10 @@ impl AttentionWeights {
        }
        let (k, v) = self.kv_cache.append(&k.contiguous()?, &v.contiguous()?)?;
        // Make tensor contiguous to avoid some strided copies
        let k = k.contiguous()?;
        let v = v.contiguous()?;
        let k = repeat_kv(k, self.num_kv_groups)?.contiguous()?;
        let v = repeat_kv(v, self.num_kv_groups)?.contiguous()?;