Add mkl support for matrix multiply. (#86)

* Fix some rebase issues. * Use mkl instead. * Use mkl in bert. * Add the optional mkl feature. * Conditional compilation based on the mkl feature. * Add more mkl support.
2025-06-15 18:28:24 +00:00 · 2023-07-06 11:05:05 +01:00
parent cd230d26fe
commit c297a50960
9 changed files with 118 additions and 3 deletions
--- a/candle-core/Cargo.toml
+++ b/candle-core/Cargo.toml
@ -11,6 +11,7 @@ license = "MIT/Apache-2.0"
 readme = "README.md"

 [dependencies]
+blas = { version = "0.22.0", optional = true }
 byteorder = "1.4.3"
 candle-kernels = { path = "../candle-kernels", optional = true }
 cudarc = { version = "0.9.9", optional = true, features = ["f16"] }
@ -18,6 +19,7 @@ cudarc = { version = "0.9.9", optional = true, features = ["f16"] }
 # https://github.com/sarah-ek/gemm/pull/8 is available.
 gemm = { git = "https://github.com/LaurentMazare/gemm.git", branch = "f16-vectorize-pack" }
 half = { version = "2.3.1", features = ["num-traits"] }
+intel-mkl-src = {version="0.8.1", optional=true, features = ["mkl-dynamic-lp64-iomp"]}
 memmap2 = "0.7.1"
 num-traits = "0.2.15"
 num_cpus = "1.15.0"
@ -31,3 +33,4 @@ anyhow = { version = "1", features = ["backtrace"] }
 [features]
 default = ["cuda"]
 cuda = ["dep:cudarc", "dep:candle-kernels"]
+mkl = ["dep:blas", "dep:intel-mkl-src"]