Quantized version of StableLM. (#1058)

* Quantized version of StableLM. * Adapt the stable-lm example to support quantizsed. * Use some separate hub repo. * Another repo name tweak.
2025-06-21 12:20:46 +00:00 · 2023-10-08 15:42:38 +01:00
parent 783735cf22
commit 59ab6d7832
4 changed files with 331 additions and 14 deletions
--- a/candle-transformers/src/models/stable_lm.rs
+++ b/candle-transformers/src/models/stable_lm.rs
@ -1,4 +1,3 @@
-#![allow(unused)]
 use crate::models::with_tracing::{linear_no_bias, Linear};
 use candle::{DType, Device, Module, Result, Tensor, D};
 use candle_nn::{Activation, LayerNorm, VarBuilder};
@ -41,21 +40,21 @@ impl Config {
        }
    }

-    fn head_dim(&self) -> usize {
+    pub fn head_dim(&self) -> usize {
        self.hidden_size / self.num_attention_heads
    }

-    fn rotary_ndims(&self) -> usize {
+    pub fn rotary_ndims(&self) -> usize {
        (self.head_dim() as f64 * self.rope_pct) as usize
    }

-    fn num_kv_groups(&self) -> usize {
+    pub fn num_kv_groups(&self) -> usize {
        self.num_attention_heads / self.num_key_value_heads
    }
 }

 #[derive(Debug)]
-struct RotaryEmbedding {
+pub(crate) struct RotaryEmbedding {
    sin: Tensor,
    cos: Tensor,
 }
@ -66,7 +65,7 @@ fn rotate_half(xs: &Tensor) -> Result<Tensor> {
 }

 impl RotaryEmbedding {
-    fn new(dtype: DType, cfg: &Config, dev: &Device) -> Result<Self> {
+    pub(crate) fn new(dtype: DType, cfg: &Config, dev: &Device) -> Result<Self> {
        let dim = cfg.rotary_ndims();
        let max_seq_len = cfg.max_position_embeddings;
        let inv_freq: Vec<_> = (0..dim)
@ -86,7 +85,7 @@ impl RotaryEmbedding {
        })
    }

-    fn apply_rotary_emb_qkv(
+    pub(crate) fn apply_rotary_emb_qkv(
        &self,
        q: &Tensor,
        k: &Tensor,