mirror of
https://github.com/huggingface/candle.git
synced 2025-06-20 04:00:28 +00:00
Better control on the optional dequantization in QMatMul (#1049)
* Cosmetic change to the quantized whisper model. * Fix the dequantization. * Add the dequantize all variable.
This commit is contained in:
@ -216,12 +216,11 @@ impl ResidualAttentionBlock {
|
||||
if let Some((attn, ln)) = &mut self.cross_attn {
|
||||
x = (&x + attn.forward(&ln.forward(&x)?, xa, None, flush_kv_cache)?)?;
|
||||
}
|
||||
let mlp = self.mlp_linear2.forward(
|
||||
&self
|
||||
.mlp_linear1
|
||||
.forward(&self.mlp_ln.forward(&x)?)?
|
||||
.gelu()?,
|
||||
)?;
|
||||
let mlp = x
|
||||
.apply(&self.mlp_ln)?
|
||||
.apply(&self.mlp_linear1)?
|
||||
.gelu()?
|
||||
.apply(&self.mlp_linear2)?;
|
||||
x + mlp
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user