mirror of
https://github.com/huggingface/candle.git
synced 2025-06-18 03:28:50 +00:00
Split out the quantized file. (#456)
This commit is contained in:
82
candle-core/src/quantized/mod.rs
Normal file
82
candle-core/src/quantized/mod.rs
Normal file
@ -0,0 +1,82 @@
|
||||
use crate::Result;
|
||||
|
||||
pub mod ggml_file;
|
||||
pub mod k_quants;
|
||||
|
||||
pub use k_quants::GgmlType;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum GgmlDType {
|
||||
F32,
|
||||
F16,
|
||||
Q4_0,
|
||||
Q4_1,
|
||||
Q5_0,
|
||||
Q5_1,
|
||||
Q8_0,
|
||||
Q8_1,
|
||||
Q2K,
|
||||
Q3K,
|
||||
Q4K,
|
||||
Q5K,
|
||||
Q6K,
|
||||
Q8K,
|
||||
}
|
||||
|
||||
impl GgmlDType {
|
||||
pub(crate) fn from_u32(u: u32) -> Result<Self> {
|
||||
let dtype = match u {
|
||||
0 => Self::F32,
|
||||
1 => Self::F16,
|
||||
2 => Self::Q4_0,
|
||||
3 => Self::Q4_1,
|
||||
6 => Self::Q5_0,
|
||||
7 => Self::Q5_1,
|
||||
8 => Self::Q8_0,
|
||||
9 => Self::Q8_1,
|
||||
10 => Self::Q2K,
|
||||
11 => Self::Q3K,
|
||||
12 => Self::Q4K,
|
||||
13 => Self::Q5K,
|
||||
14 => Self::Q6K,
|
||||
15 => Self::Q8K,
|
||||
_ => crate::bail!("unknown dtype for tensor {u}"),
|
||||
};
|
||||
Ok(dtype)
|
||||
}
|
||||
|
||||
fn type_size(&self) -> usize {
|
||||
use k_quants::*;
|
||||
match self {
|
||||
Self::F32 => 4,
|
||||
Self::F16 => 2,
|
||||
Self::Q4_0 => std::mem::size_of::<BlockQ4_0>(),
|
||||
Self::Q4_1 => std::mem::size_of::<BlockQ4_1>(),
|
||||
Self::Q5_0 => std::mem::size_of::<BlockQ5_0>(),
|
||||
Self::Q5_1 => std::mem::size_of::<BlockQ5_1>(),
|
||||
// https://github.com/ggerganov/llama.cpp/blob/468ea24fb4633a0d681f7ac84089566c1c6190cb/ggml.c#L932
|
||||
Self::Q8_0 => std::mem::size_of::<BlockQ8_0>(),
|
||||
Self::Q8_1 => std::mem::size_of::<BlockQ8_1>(),
|
||||
Self::Q2K => std::mem::size_of::<BlockQ2K>(),
|
||||
Self::Q3K => std::mem::size_of::<BlockQ3K>(),
|
||||
Self::Q4K => std::mem::size_of::<BlockQ4K>(),
|
||||
Self::Q5K => std::mem::size_of::<BlockQ5K>(),
|
||||
Self::Q6K => std::mem::size_of::<BlockQ6K>(),
|
||||
Self::Q8K => std::mem::size_of::<BlockQ8K>(),
|
||||
}
|
||||
}
|
||||
|
||||
fn blck_size(&self) -> usize {
|
||||
match self {
|
||||
Self::F32 => 1,
|
||||
Self::F16 => 1,
|
||||
Self::Q4_0 => k_quants::QK4_0,
|
||||
Self::Q4_1 => k_quants::QK4_1,
|
||||
Self::Q5_0 => k_quants::QK5_0,
|
||||
Self::Q5_1 => k_quants::QK5_1,
|
||||
Self::Q8_0 => k_quants::QK8_0,
|
||||
Self::Q8_1 => k_quants::QK8_1,
|
||||
Self::Q2K | Self::Q3K | Self::Q4K | Self::Q5K | Self::Q6K | Self::Q8K => k_quants::QK_K,
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user