AVX optimized q8k vecdot. (#1024)

2025-06-23 04:46:15 +00:00 · 2023-10-03 12:10:58 +01:00
parent b4da19d1be
commit dac73edb34
3 changed files with 44 additions and 0 deletions
--- a/candle-wasm-tests/tests/quantized_tests.rs
+++ b/candle-wasm-tests/tests/quantized_tests.rs
@ -82,6 +82,9 @@ fn ggml_reference_matmul_error(dtype: GgmlDType) -> Result<f32> {
        GgmlDType::Q5_0 => 0.001353,
        GgmlDType::Q5_1 => 0.001363,
        GgmlDType::Q8_0 => 0.000092,
+
+        // Not from the ggml repo.
+        GgmlDType::Q8K => 0.00065,
        _ => candle::bail!("No GGML results for quantization type {dtype:?}",),
    };
    Ok(err)
@ -181,3 +184,9 @@ fn quantized_matmul_q6k() -> Result<()> {
    ggml_matmul_error_test::<candle::quantized::k_quants::BlockQ6K>()?;
    Ok(())
 }
+
+#[wasm_bindgen_test]
+fn quantized_matmul_q8k() -> Result<()> {
+    ggml_matmul_error_test::<candle::quantized::k_quants::BlockQ8K>()?;
+    Ok(())
+}