Better control on the optional dequantization in QMatMul (#1049)

* Cosmetic change to the quantized whisper model.

* Fix the dequantization.

* Add the dequantize all variable.
This commit is contained in:
Laurent Mazare
2023-10-07 10:16:18 +01:00
committed by GitHub
parent 955e00b2e8
commit aa53368aeb
2 changed files with 33 additions and 13 deletions

View File

@ -216,12 +216,11 @@ impl ResidualAttentionBlock {
if let Some((attn, ln)) = &mut self.cross_attn {
x = (&x + attn.forward(&ln.forward(&x)?, xa, None, flush_kv_cache)?)?;
}
let mlp = self.mlp_linear2.forward(
&self
.mlp_linear1
.forward(&self.mlp_ln.forward(&x)?)?
.gelu()?,
)?;
let mlp = x
.apply(&self.mlp_ln)?
.apply(&self.mlp_linear1)?
.gelu()?
.apply(&self.mlp_linear2)?;
x + mlp
}
}