Better control on the optional dequantization in QMatMul (#1049)

* Cosmetic change to the quantized whisper model. * Fix the dequantization. * Add the dequantize all variable.
2025-06-20 04:00:28 +00:00 · 2023-10-07 10:16:18 +01:00
parent 955e00b2e8
commit aa53368aeb
2 changed files with 33 additions and 13 deletions
--- a/candle-transformers/src/models/whisper/quantized_model.rs
+++ b/candle-transformers/src/models/whisper/quantized_model.rs
@ -216,12 +216,11 @@ impl ResidualAttentionBlock {
        if let Some((attn, ln)) = &mut self.cross_attn {
            x = (&x + attn.forward(&ln.forward(&x)?, xa, None, flush_kv_cache)?)?;
        }
-        let mlp = self.mlp_linear2.forward(
-            &self
-                .mlp_linear1
-                .forward(&self.mlp_ln.forward(&x)?)?
-                .gelu()?,
-        )?;
+        let mlp = x
+            .apply(&self.mlp_ln)?
+            .apply(&self.mlp_linear1)?
+            .gelu()?
+            .apply(&self.mlp_linear2)?;
        x + mlp
    }
 }