Quantized version of StableLM. (#1058)

* Quantized version of StableLM.

* Adapt the stable-lm example to support quantizsed.

* Use some separate hub repo.

* Another repo name tweak.
This commit is contained in:
Laurent Mazare
2023-10-08 15:42:38 +01:00
committed by GitHub
parent 783735cf22
commit 59ab6d7832
4 changed files with 331 additions and 14 deletions

View File

@ -1,4 +1,3 @@
#![allow(unused)]
use crate::models::with_tracing::{linear_no_bias, Linear};
use candle::{DType, Device, Module, Result, Tensor, D};
use candle_nn::{Activation, LayerNorm, VarBuilder};
@ -41,21 +40,21 @@ impl Config {
}
}
fn head_dim(&self) -> usize {
pub fn head_dim(&self) -> usize {
self.hidden_size / self.num_attention_heads
}
fn rotary_ndims(&self) -> usize {
pub fn rotary_ndims(&self) -> usize {
(self.head_dim() as f64 * self.rope_pct) as usize
}
fn num_kv_groups(&self) -> usize {
pub fn num_kv_groups(&self) -> usize {
self.num_attention_heads / self.num_key_value_heads
}
}
#[derive(Debug)]
struct RotaryEmbedding {
pub(crate) struct RotaryEmbedding {
sin: Tensor,
cos: Tensor,
}
@ -66,7 +65,7 @@ fn rotate_half(xs: &Tensor) -> Result<Tensor> {
}
impl RotaryEmbedding {
fn new(dtype: DType, cfg: &Config, dev: &Device) -> Result<Self> {
pub(crate) fn new(dtype: DType, cfg: &Config, dev: &Device) -> Result<Self> {
let dim = cfg.rotary_ndims();
let max_seq_len = cfg.max_position_embeddings;
let inv_freq: Vec<_> = (0..dim)
@ -86,7 +85,7 @@ impl RotaryEmbedding {
})
}
fn apply_rotary_emb_qkv(
pub(crate) fn apply_rotary_emb_qkv(
&self,
q: &Tensor,
k: &Tensor,