Optimizing decode matmul (Phi at 28tok/s on M3).

Adding some benchmark in order to help checking out matmul performance.
2025-06-15 10:26:33 +00:00 · 2023-12-20 09:54:19 +01:00
parent 03641293ee
commit 9b5e4843a6
4 changed files with 66 additions and 5 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -32,6 +32,7 @@ accelerate-src = { version = "0.3.2" }
 anyhow = { version = "1", features = ["backtrace"] }
 byteorder = "1.4.3"
 clap = { version = "4.2.4", features = ["derive"] }
+criterion = { version = "0.5.1", default-features=false }
 cudarc = { version = "0.9.14", features = ["f16"] }
 gemm = { version = "0.16.6", features = ["wasm-simd128-enable"] }
 hf-hub = "0.3.0"