AVX optimized q8k vecdot. (#1024)

This commit is contained in:
Laurent Mazare
2023-10-03 12:10:58 +01:00
committed by GitHub
parent b4da19d1be
commit dac73edb34
3 changed files with 44 additions and 0 deletions

View File

@ -82,6 +82,9 @@ fn ggml_reference_matmul_error(dtype: GgmlDType) -> Result<f32> {
GgmlDType::Q5_0 => 0.001353,
GgmlDType::Q5_1 => 0.001363,
GgmlDType::Q8_0 => 0.000092,
// Not from the ggml repo.
GgmlDType::Q8K => 0.00065,
_ => candle::bail!("No GGML results for quantization type {dtype:?}",),
};
Ok(err)
@ -181,3 +184,9 @@ fn quantized_matmul_q6k() -> Result<()> {
ggml_matmul_error_test::<candle::quantized::k_quants::BlockQ6K>()?;
Ok(())
}
#[wasm_bindgen_test]
fn quantized_matmul_q8k() -> Result<()> {
ggml_matmul_error_test::<candle::quantized::k_quants::BlockQ8K>()?;
Ok(())
}