From d54e02d73de3391b34d4511aa7add32f9cffd4f0 Mon Sep 17 00:00:00 2001 From: Laurent Mazare Date: Thu, 23 May 2024 21:24:55 +0200 Subject: [PATCH] Avoid a contiguous call in the quantized phi 3 model. (#2209) * Simplify the KvCache api. * Avoid a contiguous call in the quantized phi3 model. --- candle-transformers/src/models/quantized_phi3.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/candle-transformers/src/models/quantized_phi3.rs b/candle-transformers/src/models/quantized_phi3.rs index f9b55d9d..257ad983 100644 --- a/candle-transformers/src/models/quantized_phi3.rs +++ b/candle-transformers/src/models/quantized_phi3.rs @@ -146,7 +146,7 @@ impl LayerWeights { }; let att = candle_nn::ops::softmax_last_dim(&att)?; // Convert to contiguous as matmul doesn't support strided vs for now. - att.matmul(&v.contiguous()?)? + att.matmul(&v)? }; let y = y.transpose(1, 2)?.reshape(&[b_sz, seq_len, n_embd])?; let y = self.attn_output.forward(&y)?;