mirror of
https://github.com/huggingface/candle.git
synced 2025-06-18 19:47:12 +00:00
Use the fast RmsNorm in the quantized model. (#1904)
This commit is contained in:
@ -327,6 +327,7 @@ impl Model {
|
||||
xs = layer.forward(&xs, attention_mask.as_ref(), seqlen_offset)?
|
||||
}
|
||||
xs.narrow(1, seq_len - 1, 1)?
|
||||
.contiguous()?
|
||||
.apply(&self.norm)?
|
||||
.apply(&self.lm_head)
|
||||
}
|
||||
|
Reference in New Issue
Block a user