Fix for q5_1 quantization. (#617)

* Fix for q5_1 quantization. * Fix some typos.
2025-06-16 10:38:54 +00:00 · 2023-08-27 08:31:18 +01:00
parent fa0d75b18d
commit a8b39dd7b7
2 changed files with 28 additions and 49 deletions
--- a/candle-core/tests/quantized_tests.rs
+++ b/candle-core/tests/quantized_tests.rs
@ -132,8 +132,6 @@ fn quantize_q4_0() -> Result<()> {
            127.0, 127.0
        ]
    );
-
-    //mirrored GGML unit test
    ggml_quantization_error_test::<BlockQ4_0>(GGML_MAX_QUANTIZATION_TOTAL_ERROR)?;
    Ok(())
 }
@ -164,8 +162,6 @@ fn quantize_q4_1() -> Result<()> {
            118.73, 118.73, 120.797, 120.797, 122.863, 122.863, 124.93, 124.93, 126.996, 126.996
        ]
    );
-
-    //mirrored GGML unit test
    ggml_quantization_error_test::<BlockQ4_1>(GGML_MAX_QUANTIZATION_TOTAL_ERROR)?;
    Ok(())
 }
@ -196,8 +192,6 @@ fn quantize_q5_0() -> Result<()> {
            119.063, 119.063, 119.063, 119.063, 127.0, 127.0, 127.0, 127.0
        ]
    );
-
-    //mirrored GGML unit test
    ggml_quantization_error_test::<BlockQ5_0>(GGML_MAX_QUANTIZATION_TOTAL_ERROR)?;
    Ok(())
 }
@ -215,20 +209,19 @@ fn quantize_q5_1() -> Result<()> {
        dst,
        &[
            0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
-            16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0,
-            16.0, 16.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0,
-            44.0, 45.0, 46.0, 47.0, 48.0, 48.0, 48.0, 48.0, 48.0, 48.0, 48.0, 48.0, 48.0, 48.0,
-            48.0, 48.0, 48.0, 48.0, 48.0, 48.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0,
-            72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0,
-            80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 80.0, 96.0, 97.0, 98.0, 99.0,
+            16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0,
+            30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0,
+            44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0,
+            58.0, 59.0, 60.0, 61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0,
+            72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0, 85.0,
+            86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0, 99.0,
            100.0, 101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0, 111.0,
-            112.0, 112.0, 112.0, 112.0, 112.0, 112.0, 112.0, 112.0, 112.0, 112.0, 112.0, 112.0,
-            112.0, 112.0, 112.0, 112.0
+            112.0, 113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0, 120.0, 121.0, 122.0, 123.0,
+            124.0, 125.0, 126.0, 127.0
        ]
    );

-    //mirrored GGML unit test
-    ggml_quantization_error_test::<BlockQ5_1>(0.014)?;
+    ggml_quantization_error_test::<BlockQ5_1>(GGML_MAX_QUANTIZATION_TOTAL_ERROR)?;
    Ok(())
 }

@ -338,7 +331,6 @@ fn quantize_q2k() -> Result<()> {
    let _quant_big = quantize_roundtrip::<BlockQ2K>(src_big.as_slice(), dst_big.as_mut_slice())?;
    compare_with_error(dst_big.as_slice(), src_big.as_slice(), 6.0);

-    //mirrored GGML unit test
    ggml_quantization_error_test::<BlockQ2K>(GGML_MAX_QUANTIZATION_TOTAL_ERROR_2BITS)?;
    Ok(())
 }
@ -366,7 +358,6 @@ fn quantize_q3k() -> Result<()> {
    let _quant_big = quantize_roundtrip::<BlockQ3K>(src_big.as_slice(), dst_big.as_mut_slice())?;
    compare_with_error(dst_big.as_slice(), src_big.as_slice(), 3.5);

-    //mirrored GGML unit test
    ggml_quantization_error_test::<BlockQ3K>(GGML_MAX_QUANTIZATION_TOTAL_ERROR_3BITS)?;
    Ok(())
 }
@ -394,7 +385,6 @@ fn quantize_q4k() -> Result<()> {
    let _quant_big = quantize_roundtrip::<BlockQ4K>(src_big.as_slice(), dst_big.as_mut_slice())?;
    compare_with_error(dst_big.as_slice(), src_big.as_slice(), 4.5);

-    //mirrored GGML unit test
    ggml_quantization_error_test::<BlockQ4K>(GGML_MAX_QUANTIZATION_TOTAL_ERROR)?;
    Ok(())
 }
@ -422,7 +412,6 @@ fn quantize_q5k() -> Result<()> {
    let _quant_big = quantize_roundtrip::<BlockQ5K>(src_big.as_slice(), dst_big.as_mut_slice())?;
    compare_with_error(dst_big.as_slice(), src_big.as_slice(), 2.5);

-    //mirrored GGML unit test
    ggml_quantization_error_test::<BlockQ5K>(GGML_MAX_QUANTIZATION_TOTAL_ERROR)?;

    Ok(())
@ -451,7 +440,6 @@ fn quantize_q6k() -> Result<()> {
    let _quant_big = quantize_roundtrip::<BlockQ6K>(src_big.as_slice(), dst_big.as_mut_slice())?;
    compare_with_error(dst_big.as_slice(), src_big.as_slice(), 2.0);

-    //mirrored GGML unit test
    ggml_quantization_error_test::<BlockQ6K>(GGML_MAX_QUANTIZATION_TOTAL_ERROR)?;

    Ok(())
@ -480,36 +468,33 @@ fn quantize_q8k() -> Result<()> {
    let _quant_big = quantize_roundtrip::<BlockQ8K>(src_big.as_slice(), dst_big.as_mut_slice())?;
    compare_with_error(dst_big.as_slice(), src_big.as_slice(), 0.6);

-    //mirrored GGML unit test
    ggml_quantization_error_test::<BlockQ8K>(GGML_MAX_QUANTIZATION_TOTAL_ERROR)?;

    Ok(())
 }

 /// Very simple dot product implementation
-fn vec_dot_referenze(a: &[f32], b: &[f32]) -> f32 {
+fn vec_dot_reference(a: &[f32], b: &[f32]) -> f32 {
    a.iter().zip(b).map(|(a, b)| a * b).sum()
 }

 /// Returns the error achieved by the GGML matmul unit test.
-fn ggml_reference_matmul_error(quantiztation_tpye: GgmlDType) -> Result<f32> {
-    match quantiztation_tpye {
-        GgmlDType::F16 => Ok(0.000010),
-        GgmlDType::Q2K => Ok(0.004086),
-        GgmlDType::Q3K => Ok(0.016148),
-        GgmlDType::Q4K => Ok(0.002425),
-        GgmlDType::Q5K => Ok(0.000740),
-        GgmlDType::Q6K => Ok(0.000952),
-        GgmlDType::Q4_0 => Ok(0.001143),
-        GgmlDType::Q4_1 => Ok(0.007784),
-        GgmlDType::Q5_0 => Ok(0.001353),
-        GgmlDType::Q5_1 => Ok(0.001363),
-        GgmlDType::Q8_0 => Ok(0.000092),
-        _ => candle_core::bail!(
-            "No GGML results for quantization type {:?}",
-            quantiztation_tpye
-        ),
-    }
+fn ggml_reference_matmul_error(dtype: GgmlDType) -> Result<f32> {
+    let err = match dtype {
+        GgmlDType::F16 => 0.000010,
+        GgmlDType::Q2K => 0.004086,
+        GgmlDType::Q3K => 0.016148,
+        GgmlDType::Q4K => 0.002425,
+        GgmlDType::Q5K => 0.000740,
+        GgmlDType::Q6K => 0.000952,
+        GgmlDType::Q4_0 => 0.001143,
+        GgmlDType::Q4_1 => 0.007784,
+        GgmlDType::Q5_0 => 0.001353,
+        GgmlDType::Q5_1 => 0.001363,
+        GgmlDType::Q8_0 => 0.000092,
+        _ => candle_core::bail!("No GGML results for quantization type {dtype:?}",),
+    };
+    Ok(err)
 }

 /// Mirrores the GGML matmul unit test: https://github.com/ggerganov/llama.cpp/blob/master/tests/test-quantize-fns.cpp#L76-L91
@ -524,7 +509,7 @@ fn ggml_matmul_error_test<T: GgmlType>() -> Result<()> {
    T::VecDotType::from_float(&b, &mut b_quant)?;

    let result = T::vec_dot(length, &a_quant, &b_quant)?;
-    let reference_result = vec_dot_referenze(&a, &b);
+    let reference_result = vec_dot_reference(&a, &b);

    let error = (result - reference_result).abs() / length as f32;

@ -595,7 +580,6 @@ fn quantized_matmul_q2k() -> Result<()> {
    let dst = round_vector(&[dst[0], dst[m * n / 3], dst[m * n * 2 / 3], dst[m * n - 1]]);
    assert_eq!(dst, [0.916, 0.422, 0.215, 1.668]);

-    //mirrored GGML unit test
    ggml_matmul_error_test::<BlockQ2K>()?;

    Ok(())
@ -622,7 +606,6 @@ fn quantized_matmul_q3k() -> Result<()> {
    let dst = round_vector(&[dst[0], dst[m * n / 3], dst[m * n * 2 / 3], dst[m * n - 1]]);
    assert_eq!(dst, [1.029, 1.418, -0.314, 1.495]);

-    //mirrored GGML unit test
    ggml_matmul_error_test::<BlockQ3K>()?;

    Ok(())
@ -649,7 +632,6 @@ fn quantized_matmul_q4k() -> Result<()> {
    let dst = round_vector(&[dst[0], dst[m * n / 3], dst[m * n * 2 / 3], dst[m * n - 1]]);
    assert_eq!(dst, [1.125, 1.435, -0.201, 1.589]);

-    //mirrored GGML unit test
    ggml_matmul_error_test::<BlockQ4K>()?;

    Ok(())
@ -676,7 +658,6 @@ fn quantized_matmul_q5k() -> Result<()> {
    let dst = round_vector(&[dst[0], dst[m * n / 3], dst[m * n * 2 / 3], dst[m * n - 1]]);
    assert_eq!(dst, [1.192, 1.491, -0.18, 1.743]);

-    //mirrored GGML unit test
    //Expected: 0.000740408897
    ggml_matmul_error_test::<BlockQ5K>()?;

@ -704,8 +685,6 @@ fn quantized_matmul_q6k() -> Result<()> {
    let dst = round_vector(&[dst[0], dst[m * n / 3], dst[m * n * 2 / 3], dst[m * n - 1]]);
    assert_eq!(dst, [1.324, 1.49, -0.164, 1.741]);

-    //mirrored GGML unit test
    ggml_matmul_error_test::<BlockQ6K>()?;
-
    Ok(())
 }