From 912a3d63b049958d8b3bbaf0535a3179c17fd333 Mon Sep 17 00:00:00 2001 From: Laurent Mazare Date: Fri, 22 Sep 2023 21:36:56 +0100 Subject: [PATCH] Use the proper block size for quantizing models. (#933) * Use the proper block size for quantizing models. * Use the proper dimension. --- candle-core/examples/tensor-tools.rs | 19 +++++++++++++++++-- candle-examples/examples/phi/README.md | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/candle-core/examples/tensor-tools.rs b/candle-core/examples/tensor-tools.rs index c3459004..c0d5a334 100644 --- a/candle-core/examples/tensor-tools.rs +++ b/candle-core/examples/tensor-tools.rs @@ -243,12 +243,27 @@ fn run_quantize_safetensors( Quantization::F16 => QTensor::quantize::, Quantization::F32 => QTensor::quantize::, }; + let block_size = match q { + Quantization::Q4_0 => k_quants::QK4_0, + Quantization::Q4_1 => k_quants::QK4_1, + Quantization::Q5_0 => k_quants::QK5_0, + Quantization::Q5_1 => k_quants::QK5_1, + Quantization::Q8_0 => k_quants::QK8_0, + Quantization::Q8_1 => k_quants::QK8_1, + Quantization::Q2k + | Quantization::Q3k + | Quantization::Q4k + | Quantization::Q5k + | Quantization::Q6k + | Quantization::Q8k => k_quants::QK_K, + Quantization::F16 | Quantization::F32 => 1, + }; let qtensors = tensors .into_par_iter() .map(|(name, tensor)| { - println!(" quantizing {name} {tensor:?}"); - let should_quantize = tensor.rank() == 2 && tensor.dim(0)? % 256 == 0; + let should_quantize = tensor.rank() == 2 && tensor.dim(1)? % block_size == 0; + println!(" quantizing {name} {tensor:?} {should_quantize}"); let tensor = if should_quantize { quantize_fn(&tensor)? } else { diff --git a/candle-examples/examples/phi/README.md b/candle-examples/examples/phi/README.md index 8cf053bd..cdd32404 100644 --- a/candle-examples/examples/phi/README.md +++ b/candle-examples/examples/phi/README.md @@ -1,4 +1,4 @@ -# candle-starcoder: code generation model +# candle-phi: 1.3b LLM with state of the art performance for <10b models. [phi-1.5](https://huggingface.co/microsoft/phi-1_5).