Fixes for running Phi-4 quantized. (#2714)

This commit is contained in:
Jani Monoses
2025-01-13 15:35:33 +02:00
committed by GitHub
parent 461e8c1685
commit ab7ff7081e
2 changed files with 6 additions and 2 deletions

View File

@ -127,7 +127,7 @@ impl LayerWeights {
.reshape((b_sz, seq_len, self.n_head, self.head_dim))?
.transpose(1, 2)?;
let k = k
.reshape((b_sz, seq_len, self.n_head, self.head_dim))?
.reshape((b_sz, seq_len, self.n_kv_head, self.head_dim))?
.transpose(1, 2)?;
let v = v
.reshape((b_sz, seq_len, self.n_kv_head, self.head_dim))?