Use the fast RmsNorm in the quantized model. (#1904)

This commit is contained in:
Laurent Mazare
2024-03-21 18:49:35 +01:00
committed by GitHub
parent 9563a5fee4
commit c0bdd9c7a6
3 changed files with 21 additions and 35 deletions

View File

@ -327,6 +327,7 @@ impl Model {
xs = layer.forward(&xs, attention_mask.as_ref(), seqlen_offset)?
}
xs.narrow(1, seq_len - 1, 1)?
.contiguous()?
.apply(&self.norm)?
.apply(&self.lm_head)
}