Add the quantized mpt model. (#1123)

* Add the quantized mpt model. * Support the quantized model for replit-code.
2025-06-19 19:58:35 +00:00 · 2023-10-18 16:29:38 +01:00
parent cb034506cd
commit 86e7d539d2
5 changed files with 247 additions and 9 deletions
--- a/candle-transformers/src/quantized_nn.rs
+++ b/candle-transformers/src/quantized_nn.rs
@ -59,6 +59,11 @@ pub fn layer_norm(size: usize, eps: f64, vb: VarBuilder) -> Result<candle_nn::La
    Ok(candle_nn::LayerNorm::new(weight, bias, eps))
 }

+pub fn layer_norm_no_bias(size: usize, eps: f64, vb: VarBuilder) -> Result<candle_nn::LayerNorm> {
+    let weight = vb.get(size, "weight")?.dequantize(vb.device())?;
+    Ok(candle_nn::LayerNorm::new_no_bias(weight, eps))
+}
+
 pub fn linear_no_bias(in_dim: usize, out_dim: usize, vb: VarBuilder) -> Result<Linear> {
    let weight = QMatMul::new(in_dim, out_dim, vb)?;
    Ok(Linear { weight, bias: None })