Add mkl support for matrix multiply. (#86)

* Fix some rebase issues.

* Use mkl instead.

* Use mkl in bert.

* Add the optional mkl feature.

* Conditional compilation based on the mkl feature.

* Add more mkl support.
This commit is contained in:
Laurent Mazare
2023-07-06 11:05:05 +01:00
committed by GitHub
parent cd230d26fe
commit c297a50960
9 changed files with 118 additions and 3 deletions

View File

@ -11,6 +11,7 @@ license = "MIT/Apache-2.0"
readme = "README.md"
[dependencies]
blas = { version = "0.22.0", optional = true }
byteorder = "1.4.3"
candle-kernels = { path = "../candle-kernels", optional = true }
cudarc = { version = "0.9.9", optional = true, features = ["f16"] }
@ -18,6 +19,7 @@ cudarc = { version = "0.9.9", optional = true, features = ["f16"] }
# https://github.com/sarah-ek/gemm/pull/8 is available.
gemm = { git = "https://github.com/LaurentMazare/gemm.git", branch = "f16-vectorize-pack" }
half = { version = "2.3.1", features = ["num-traits"] }
intel-mkl-src = {version="0.8.1", optional=true, features = ["mkl-dynamic-lp64-iomp"]}
memmap2 = "0.7.1"
num-traits = "0.2.15"
num_cpus = "1.15.0"
@ -31,3 +33,4 @@ anyhow = { version = "1", features = ["backtrace"] }
[features]
default = ["cuda"]
cuda = ["dep:cudarc", "dep:candle-kernels"]
mkl = ["dep:blas", "dep:intel-mkl-src"]