mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 18:48:51 +00:00
Quantization tests + fix some issues. (#616)
This commit is contained in:
@ -302,9 +302,9 @@ impl GgmlType for BlockQ4_1 {
|
|||||||
ys.d = f16::from_f32(d);
|
ys.d = f16::from_f32(d);
|
||||||
ys.m = f16::from_f32(min);
|
ys.m = f16::from_f32(min);
|
||||||
|
|
||||||
for (j, q) in ys.qs.iter_mut().enumerate() {
|
for (j, q) in ys.qs.iter_mut().take(qk / 2).enumerate() {
|
||||||
let x0 = (xs[i * qk + j] - min) * id;
|
let x0 = (xs[j] - min) * id;
|
||||||
let x1 = (xs[i * qk + qk / 2 + j] - min) * id;
|
let x1 = (xs[qk / 2 + j] - min) * id;
|
||||||
|
|
||||||
let xi0 = u8::min(15, (x0 + 0.5) as u8);
|
let xi0 = u8::min(15, (x0 + 0.5) as u8);
|
||||||
let xi1 = u8::min(15, (x1 + 0.5) as u8);
|
let xi1 = u8::min(15, (x1 + 0.5) as u8);
|
||||||
@ -496,9 +496,9 @@ impl GgmlType for BlockQ5_1 {
|
|||||||
ys.m = f16::from_f32(min);
|
ys.m = f16::from_f32(min);
|
||||||
|
|
||||||
let mut qh = 0u32;
|
let mut qh = 0u32;
|
||||||
for (j, q) in ys.qs.iter_mut().enumerate() {
|
for (j, q) in ys.qs.iter_mut().take(qk / 2).enumerate() {
|
||||||
let x0 = (xs[i * qk + j] - min) * id;
|
let x0 = (xs[j] - min) * id;
|
||||||
let x1 = (xs[i * qk + qk / 2 + j] - min) * id;
|
let x1 = (xs[qk / 2 + j] - min) * id;
|
||||||
|
|
||||||
let xi0 = (x0 + 0.5) as u8;
|
let xi0 = (x0 + 0.5) as u8;
|
||||||
let xi1 = (x1 + 0.5) as u8;
|
let xi1 = (x1 + 0.5) as u8;
|
||||||
|
@ -135,7 +135,100 @@ fn quantize_q4_0() -> Result<()> {
|
|||||||
|
|
||||||
//mirrored GGML unit test
|
//mirrored GGML unit test
|
||||||
ggml_quantization_error_test::<BlockQ4_0>(GGML_MAX_QUANTIZATION_TOTAL_ERROR)?;
|
ggml_quantization_error_test::<BlockQ4_0>(GGML_MAX_QUANTIZATION_TOTAL_ERROR)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn quantize_q4_1() -> Result<()> {
|
||||||
|
use k_quants::BlockQ4_1;
|
||||||
|
|
||||||
|
let src = (0..32 * 4).map(|v| v as f32).collect::<Vec<_>>();
|
||||||
|
let mut dst = vec![0f32; 32 * 4];
|
||||||
|
let mut quant = vec![BlockQ4_1::zeros(); 4];
|
||||||
|
BlockQ4_1::from_float(&src, &mut quant)?;
|
||||||
|
BlockQ4_1::to_float(&quant, dst.as_mut_slice())?;
|
||||||
|
assert_eq!(
|
||||||
|
round_vector(&dst),
|
||||||
|
&[
|
||||||
|
0.0, 0.0, 2.066, 2.066, 4.133, 4.133, 6.199, 6.199, 8.266, 8.266, 10.332, 10.332,
|
||||||
|
12.398, 12.398, 14.465, 14.465, 16.531, 16.531, 18.598, 18.598, 20.664, 20.664, 22.73,
|
||||||
|
22.73, 24.797, 24.797, 26.863, 26.863, 28.93, 28.93, 30.996, 30.996, 32.0, 32.0,
|
||||||
|
34.066, 34.066, 36.133, 36.133, 38.199, 38.199, 40.266, 40.266, 42.332, 42.332, 44.398,
|
||||||
|
44.398, 46.465, 46.465, 48.531, 48.531, 50.598, 50.598, 52.664, 52.664, 54.73, 54.73,
|
||||||
|
56.797, 56.797, 58.863, 58.863, 60.93, 60.93, 62.996, 62.996, 64.0, 64.0, 66.066,
|
||||||
|
66.066, 68.133, 68.133, 70.199, 70.199, 72.266, 72.266, 74.332, 74.332, 76.398, 76.398,
|
||||||
|
78.465, 78.465, 80.531, 80.531, 82.598, 82.598, 84.664, 84.664, 86.73, 86.73, 88.797,
|
||||||
|
88.797, 90.863, 90.863, 92.93, 92.93, 94.996, 94.996, 96.0, 96.0, 98.066, 98.066,
|
||||||
|
100.133, 100.133, 102.199, 102.199, 104.266, 104.266, 106.332, 106.332, 108.398,
|
||||||
|
108.398, 110.465, 110.465, 112.531, 112.531, 114.598, 114.598, 116.664, 116.664,
|
||||||
|
118.73, 118.73, 120.797, 120.797, 122.863, 122.863, 124.93, 124.93, 126.996, 126.996
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
//mirrored GGML unit test
|
||||||
|
ggml_quantization_error_test::<BlockQ4_1>(GGML_MAX_QUANTIZATION_TOTAL_ERROR)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn quantize_q5_0() -> Result<()> {
|
||||||
|
use k_quants::BlockQ5_0;
|
||||||
|
|
||||||
|
let src = (0..32 * 4).map(|v| v as f32).collect::<Vec<_>>();
|
||||||
|
let mut dst = vec![0f32; 32 * 4];
|
||||||
|
let mut quant = vec![BlockQ5_0::zeros(); 4];
|
||||||
|
BlockQ5_0::from_float(&src, &mut quant)?;
|
||||||
|
BlockQ5_0::to_float(&quant, dst.as_mut_slice())?;
|
||||||
|
assert_eq!(
|
||||||
|
round_vector(&dst),
|
||||||
|
&[
|
||||||
|
-0.0, 1.938, 1.938, 3.875, 3.875, 5.813, 5.813, 7.75, 7.75, 9.688, 9.688, 11.625,
|
||||||
|
11.625, 13.563, 13.563, 15.5, 15.5, 17.438, 17.438, 19.375, 19.375, 21.313, 21.313,
|
||||||
|
23.25, 23.25, 25.188, 25.188, 27.125, 27.125, 29.063, 29.063, 31.0, 31.5, 31.5, 35.438,
|
||||||
|
35.438, 35.438, 35.438, 39.375, 39.375, 39.375, 39.375, 43.313, 43.313, 43.313, 43.313,
|
||||||
|
47.25, 47.25, 47.25, 47.25, 51.188, 51.188, 51.188, 51.188, 55.125, 55.125, 55.125,
|
||||||
|
55.125, 59.063, 59.063, 59.063, 59.063, 63.0, 63.0, 65.313, 65.313, 65.313, 65.313,
|
||||||
|
65.313, 71.25, 71.25, 71.25, 71.25, 71.25, 71.25, 77.188, 77.188, 77.188, 77.188,
|
||||||
|
77.188, 77.188, 83.125, 83.125, 83.125, 83.125, 83.125, 83.125, 89.063, 89.063, 89.063,
|
||||||
|
89.063, 89.063, 89.063, 95.0, 95.0, 95.0, 95.25, 95.25, 95.25, 95.25, 103.188, 103.188,
|
||||||
|
103.188, 103.188, 103.188, 103.188, 103.188, 103.188, 111.125, 111.125, 111.125,
|
||||||
|
111.125, 111.125, 111.125, 111.125, 111.125, 119.063, 119.063, 119.063, 119.063,
|
||||||
|
119.063, 119.063, 119.063, 119.063, 127.0, 127.0, 127.0, 127.0
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
//mirrored GGML unit test
|
||||||
|
ggml_quantization_error_test::<BlockQ5_0>(GGML_MAX_QUANTIZATION_TOTAL_ERROR)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn quantize_q5_1() -> Result<()> {
|
||||||
|
use k_quants::BlockQ5_1;
|
||||||
|
|
||||||
|
let src = (0..32 * 4).map(|v| v as f32).collect::<Vec<_>>();
|
||||||
|
let mut dst = vec![0f32; 32 * 4];
|
||||||
|
let mut quant = vec![BlockQ5_1::zeros(); 4];
|
||||||
|
BlockQ5_1::from_float(&src, &mut quant)?;
|
||||||
|
BlockQ5_1::to_float(&quant, dst.as_mut_slice())?;
|
||||||
|
assert_eq!(
|
||||||
|
dst,
|
||||||
|
&[
|
||||||
|
0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
|
||||||
|
16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0,
|
||||||
|
16.0, 16.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0,
|
||||||
|
44.0, 45.0, 46.0, 47.0, 48.0, 48.0, 48.0, 48.0, 48.0, 48.0, 48.0, 48.0, 48.0, 48.0,
|
||||||
|
48.0, 48.0, 48.0, 48.0, 48.0, 48.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0,
|
||||||
|
72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0,
|
||||||
|
80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 96.0, 97.0, 98.0, 99.0,
|
||||||
|
100.0, 101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0, 111.0,
|
||||||
|
112.0, 112.0, 112.0, 112.0, 112.0, 112.0, 112.0, 112.0, 112.0, 112.0, 112.0, 112.0,
|
||||||
|
112.0, 112.0, 112.0, 112.0
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
//mirrored GGML unit test
|
||||||
|
ggml_quantization_error_test::<BlockQ5_1>(0.014)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user