Avoid a contiguous call in the quantized phi 3 model. (#2209)

* Simplify the KvCache api. * Avoid a contiguous call in the quantized phi3 model.
2025-06-16 10:38:54 +00:00 · 2024-05-23 21:24:55 +02:00
parent 45e235a747
commit d54e02d73d
1 changed files with 1 additions and 1 deletions
--- a/candle-transformers/src/models/quantized_phi3.rs
+++ b/candle-transformers/src/models/quantized_phi3.rs
@ -146,7 +146,7 @@ impl LayerWeights {
            };
            let att = candle_nn::ops::softmax_last_dim(&att)?;
            // Convert to contiguous as matmul doesn't support strided vs for now.
-            att.matmul(&v.contiguous()?)?
+            att.matmul(&v)?
        };
        let y = y.transpose(1, 2)?.reshape(&[b_sz, seq_len, n_embd])?;
        let y = self.attn_output.forward(&y)?;