mirror of
https://github.com/huggingface/candle.git
synced 2025-06-15 10:26:33 +00:00
Optimizing decode matmul (Phi at 28tok/s on M3).
Adding some benchmark in order to help checking out matmul performance.
This commit is contained in:
@ -32,6 +32,7 @@ accelerate-src = { version = "0.3.2" }
|
||||
anyhow = { version = "1", features = ["backtrace"] }
|
||||
byteorder = "1.4.3"
|
||||
clap = { version = "4.2.4", features = ["derive"] }
|
||||
criterion = { version = "0.5.1", default-features=false }
|
||||
cudarc = { version = "0.9.14", features = ["f16"] }
|
||||
gemm = { version = "0.16.6", features = ["wasm-simd128-enable"] }
|
||||
hf-hub = "0.3.0"
|
||||
|
Reference in New Issue
Block a user