Sketch a simd128 optimized q4k vecdot. (#977)

* Sketch a simd128 optimized q4k vecdot.

* Simdify.

* More quantization optimizations.

* Again more simdification.

* Simdify the splitting loop.
This commit is contained in:
Laurent Mazare
2023-09-27 20:19:38 +01:00
committed by GitHub
parent 667f01c173
commit 9cb110c44c
3 changed files with 103 additions and 1 deletions

View File

@ -133,6 +133,12 @@ fn quantized_matmul_q40() -> Result<()> {
Ok(())
}
#[wasm_bindgen_test]
fn quantized_matmul_q4k() -> Result<()> {
ggml_matmul_error_test::<candle::quantized::k_quants::BlockQ4K>()?;
Ok(())
}
#[wasm_bindgen_test]
fn quantized_matmul_q80() -> Result<()> {
ggml_matmul_error_test::<candle::quantized::k_quants::BlockQ8_0>()?;