Optimizing decode matmul (Phi at 28tok/s on M3).

Adding some benchmark in order to help checking out matmul performance.
This commit is contained in:
Nicolas Patry
2023-12-20 09:54:19 +01:00
parent 03641293ee
commit 9b5e4843a6
4 changed files with 66 additions and 5 deletions

View File

@ -32,6 +32,7 @@ accelerate-src = { version = "0.3.2" }
anyhow = { version = "1", features = ["backtrace"] }
byteorder = "1.4.3"
clap = { version = "4.2.4", features = ["derive"] }
criterion = { version = "0.5.1", default-features=false }
cudarc = { version = "0.9.14", features = ["f16"] }
gemm = { version = "0.16.6", features = ["wasm-simd128-enable"] }
hf-hub = "0.3.0"