mirror of
https://github.com/huggingface/candle.git
synced 2025-06-18 19:47:12 +00:00
Cleanup.
This commit is contained in:
@ -11,13 +11,7 @@ enum QuantizationMode {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl QuantizationMode {
|
impl QuantizationMode {
|
||||||
fn quantize(
|
fn quantize(&self, name: &str, tensor: QTensor, dtype: GgmlDType) -> Result<QTensor> {
|
||||||
&self,
|
|
||||||
name: &str,
|
|
||||||
tensor: QTensor,
|
|
||||||
dtype: GgmlDType,
|
|
||||||
// default: fn(&Tensor) -> Result<QTensor>,
|
|
||||||
) -> Result<QTensor> {
|
|
||||||
match self {
|
match self {
|
||||||
Self::Llama => {
|
Self::Llama => {
|
||||||
// Same behavior as the llama.cpp quantization.
|
// Same behavior as the llama.cpp quantization.
|
||||||
@ -28,7 +22,6 @@ impl QuantizationMode {
|
|||||||
QTensor::quantize(&tensor, GgmlDType::Q6K)
|
QTensor::quantize(&tensor, GgmlDType::Q6K)
|
||||||
} else {
|
} else {
|
||||||
QTensor::quantize(&tensor, dtype)
|
QTensor::quantize(&tensor, dtype)
|
||||||
// default(&tensor)
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Ok(tensor)
|
Ok(tensor)
|
||||||
@ -270,22 +263,6 @@ fn run_quantize_safetensors(
|
|||||||
println!("tensors: {}", tensors.len());
|
println!("tensors: {}", tensors.len());
|
||||||
|
|
||||||
let dtype = q.dtype();
|
let dtype = q.dtype();
|
||||||
// let quantize_fn = match q {
|
|
||||||
// Quantization::Q4_0 => QTensor::quantize::<k_quants::BlockQ4_0>,
|
|
||||||
// Quantization::Q4_1 => QTensor::quantize::<k_quants::BlockQ4_1>,
|
|
||||||
// Quantization::Q5_0 => QTensor::quantize::<k_quants::BlockQ5_0>,
|
|
||||||
// Quantization::Q5_1 => QTensor::quantize::<k_quants::BlockQ5_1>,
|
|
||||||
// Quantization::Q8_0 => QTensor::quantize::<k_quants::BlockQ8_0>,
|
|
||||||
// Quantization::Q8_1 => QTensor::quantize::<k_quants::BlockQ8_1>,
|
|
||||||
// Quantization::Q2k => QTensor::quantize::<k_quants::BlockQ2K>,
|
|
||||||
// Quantization::Q3k => QTensor::quantize::<k_quants::BlockQ3K>,
|
|
||||||
// Quantization::Q4k => QTensor::quantize::<k_quants::BlockQ4K>,
|
|
||||||
// Quantization::Q5k => QTensor::quantize::<k_quants::BlockQ5K>,
|
|
||||||
// Quantization::Q6k => QTensor::quantize::<k_quants::BlockQ6K>,
|
|
||||||
// Quantization::Q8k => QTensor::quantize::<k_quants::BlockQ8K>,
|
|
||||||
// Quantization::F16 => QTensor::quantize::<half::f16>,
|
|
||||||
// Quantization::F32 => QTensor::quantize::<f32>,
|
|
||||||
// };
|
|
||||||
let block_size = dtype.block_size();
|
let block_size = dtype.block_size();
|
||||||
|
|
||||||
let qtensors = tensors
|
let qtensors = tensors
|
||||||
|
@ -131,7 +131,7 @@ impl MetalDevice {
|
|||||||
&self.device
|
&self.device
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn fence(&self) -> &metal::Fence {
|
pub(crate) fn fence(&self) -> &metal::Fence {
|
||||||
&self.fence
|
&self.fence
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,14 +29,12 @@ impl Device {
|
|||||||
fn qzeros(&self, elem_count: usize, dtype: GgmlDType) -> Result<QStorage> {
|
fn qzeros(&self, elem_count: usize, dtype: GgmlDType) -> Result<QStorage> {
|
||||||
match self {
|
match self {
|
||||||
Device::Cpu => {
|
Device::Cpu => {
|
||||||
let storage = dtype.zeros(elem_count);
|
let storage = dtype.cpu_zeros(elem_count);
|
||||||
Ok(QStorage::Cpu(storage))
|
Ok(QStorage::Cpu(storage))
|
||||||
}
|
}
|
||||||
#[cfg(feature = "metal")]
|
#[cfg(feature = "metal")]
|
||||||
Device::Metal(metal) => {
|
Device::Metal(metal) => {
|
||||||
let size = elem_count * dtype.type_size() / dtype.block_size();
|
let size = elem_count * dtype.type_size() / dtype.block_size();
|
||||||
// let cpu_storage = dtype.zeros(elem_count);
|
|
||||||
// assert_eq!(size, cpu_storage.size());
|
|
||||||
let buffer = metal.allocate_zeros(size)?;
|
let buffer = metal.allocate_zeros(size)?;
|
||||||
Ok(QStorage::Metal(metal::QMetalStorage::new(
|
Ok(QStorage::Metal(metal::QMetalStorage::new(
|
||||||
buffer,
|
buffer,
|
||||||
@ -182,7 +180,7 @@ impl GgmlDType {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// The block dtype
|
/// The block dtype
|
||||||
pub fn zeros(&self, elem_count: usize) -> Box<dyn QuantizedType> {
|
pub fn cpu_zeros(&self, elem_count: usize) -> Box<dyn QuantizedType> {
|
||||||
match self {
|
match self {
|
||||||
Self::F32 => Box::new(vec![f32::zeros(); elem_count]),
|
Self::F32 => Box::new(vec![f32::zeros(); elem_count]),
|
||||||
Self::F16 => Box::new(vec![f16::zeros(); elem_count]),
|
Self::F16 => Box::new(vec![f16::zeros(); elem_count]),
|
||||||
|
Reference in New Issue
Block a user