From d54e02d73de3391b34d4511aa7add32f9cffd4f0 Mon Sep 17 00:00:00 2001
From: Laurent Mazare <laurent.mazare@gmail.com>
Date: Thu, 23 May 2024 21:24:55 +0200
Subject: [PATCH] Avoid a contiguous call in the quantized phi 3 model. (#2209)

* Simplify the KvCache api.

* Avoid a contiguous call in the quantized phi3 model.
---
 candle-transformers/src/models/quantized_phi3.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/candle-transformers/src/models/quantized_phi3.rs b/candle-transformers/src/models/quantized_phi3.rs
index f9b55d9d..257ad983 100644
--- a/candle-transformers/src/models/quantized_phi3.rs
+++ b/candle-transformers/src/models/quantized_phi3.rs
@@ -146,7 +146,7 @@ impl LayerWeights {
             };
             let att = candle_nn::ops::softmax_last_dim(&att)?;
             // Convert to contiguous as matmul doesn't support strided vs for now.
-            att.matmul(&v.contiguous()?)?
+            att.matmul(&v)?
         };
         let y = y.transpose(1, 2)?.reshape(&[b_sz, seq_len, n_embd])?;
         let y = self.attn_output.forward(&y)?;