mirror of
https://github.com/huggingface/candle.git
synced 2025-06-21 12:20:46 +00:00
Quantized version of StableLM. (#1058)
* Quantized version of StableLM. * Adapt the stable-lm example to support quantizsed. * Use some separate hub repo. * Another repo name tweak.
This commit is contained in:
@ -1,4 +1,3 @@
|
||||
#![allow(unused)]
|
||||
use crate::models::with_tracing::{linear_no_bias, Linear};
|
||||
use candle::{DType, Device, Module, Result, Tensor, D};
|
||||
use candle_nn::{Activation, LayerNorm, VarBuilder};
|
||||
@ -41,21 +40,21 @@ impl Config {
|
||||
}
|
||||
}
|
||||
|
||||
fn head_dim(&self) -> usize {
|
||||
pub fn head_dim(&self) -> usize {
|
||||
self.hidden_size / self.num_attention_heads
|
||||
}
|
||||
|
||||
fn rotary_ndims(&self) -> usize {
|
||||
pub fn rotary_ndims(&self) -> usize {
|
||||
(self.head_dim() as f64 * self.rope_pct) as usize
|
||||
}
|
||||
|
||||
fn num_kv_groups(&self) -> usize {
|
||||
pub fn num_kv_groups(&self) -> usize {
|
||||
self.num_attention_heads / self.num_key_value_heads
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct RotaryEmbedding {
|
||||
pub(crate) struct RotaryEmbedding {
|
||||
sin: Tensor,
|
||||
cos: Tensor,
|
||||
}
|
||||
@ -66,7 +65,7 @@ fn rotate_half(xs: &Tensor) -> Result<Tensor> {
|
||||
}
|
||||
|
||||
impl RotaryEmbedding {
|
||||
fn new(dtype: DType, cfg: &Config, dev: &Device) -> Result<Self> {
|
||||
pub(crate) fn new(dtype: DType, cfg: &Config, dev: &Device) -> Result<Self> {
|
||||
let dim = cfg.rotary_ndims();
|
||||
let max_seq_len = cfg.max_position_embeddings;
|
||||
let inv_freq: Vec<_> = (0..dim)
|
||||
@ -86,7 +85,7 @@ impl RotaryEmbedding {
|
||||
})
|
||||
}
|
||||
|
||||
fn apply_rotary_emb_qkv(
|
||||
pub(crate) fn apply_rotary_emb_qkv(
|
||||
&self,
|
||||
q: &Tensor,
|
||||
k: &Tensor,
|
||||
|
Reference in New Issue
Block a user