diff --git a/Cargo.toml b/Cargo.toml index cd597eb4..44bc5412 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -51,7 +51,7 @@ half = { version = "2.5.0", features = ["num-traits", "use-intrinsics", "rand_di hound = "3.5.1" image = { version = "0.25.2", default-features = false, features = ["jpeg", "png"] } imageproc = { version = "0.24.0", default-features = false } -intel-mkl-src = { version = "0.8.1", features = ["mkl-static-lp64-iomp"] } +intel-mkl-src = { version = "0.8.1" } libc = { version = "0.2.147" } log = "0.4" memmap2 = { version = "0.9.3", features = ["stable_deref_trait"] } diff --git a/candle-core/Cargo.toml b/candle-core/Cargo.toml index d5d5bde0..66e456ed 100644 --- a/candle-core/Cargo.toml +++ b/candle-core/Cargo.toml @@ -45,7 +45,8 @@ criterion = { workspace = true } default = [] cuda = ["cudarc", "dep:candle-kernels", "dep:ug-cuda"] cudnn = ["cuda", "cudarc/cudnn"] -mkl = ["dep:libc", "dep:intel-mkl-src"] +_mkl = ["dep:libc", "dep:intel-mkl-src"] +mkl = ["_mkl", "intel-mkl-src?/mkl-static-lp64-iomp"] accelerate = ["dep:libc", "dep:accelerate-src"] metal = ["dep:metal", "dep:candle-metal-kernels", "dep:ug-metal"] diff --git a/candle-core/benches/benchmarks/mod.rs b/candle-core/benches/benchmarks/mod.rs index 721b292d..66597ae1 100644 --- a/candle-core/benches/benchmarks/mod.rs +++ b/candle-core/benches/benchmarks/mod.rs @@ -39,7 +39,7 @@ impl BenchDevice for Device { Device::Cpu => { let cpu_type = if cfg!(feature = "accelerate") { "accelerate" - } else if cfg!(feature = "mkl") { + } else if cfg!(feature = "_mkl") { "mkl" } else { "cpu" diff --git a/candle-core/examples/basics.rs b/candle-core/examples/basics.rs index fe15187b..5cce0c6b 100644 --- a/candle-core/examples/basics.rs +++ b/candle-core/examples/basics.rs @@ -1,4 +1,4 @@ -#[cfg(feature = "mkl")] +#[cfg(feature = "_mkl")] extern crate intel_mkl_src; #[cfg(feature = "accelerate")] diff --git a/candle-core/examples/cuda_basics.rs b/candle-core/examples/cuda_basics.rs index 9af1b006..c09365b6 100644 --- a/candle-core/examples/cuda_basics.rs +++ b/candle-core/examples/cuda_basics.rs @@ -1,7 +1,7 @@ #[cfg(feature = "accelerate")] extern crate accelerate_src; -#[cfg(feature = "mkl")] +#[cfg(feature = "_mkl")] extern crate intel_mkl_src; use anyhow::Result; diff --git a/candle-core/examples/cuda_sum_benchmark.rs b/candle-core/examples/cuda_sum_benchmark.rs index d6d182e8..f7cf97ec 100644 --- a/candle-core/examples/cuda_sum_benchmark.rs +++ b/candle-core/examples/cuda_sum_benchmark.rs @@ -1,4 +1,4 @@ -#[cfg(feature = "mkl")] +#[cfg(feature = "_mkl")] extern crate intel_mkl_src; #[cfg(feature = "accelerate")] diff --git a/candle-core/examples/metal_basics.rs b/candle-core/examples/metal_basics.rs index f9ff81ad..44fe98da 100644 --- a/candle-core/examples/metal_basics.rs +++ b/candle-core/examples/metal_basics.rs @@ -1,7 +1,7 @@ #[cfg(feature = "accelerate")] extern crate accelerate_src; -#[cfg(feature = "mkl")] +#[cfg(feature = "_mkl")] extern crate intel_mkl_src; use anyhow::Result; diff --git a/candle-core/src/cpu_backend/mod.rs b/candle-core/src/cpu_backend/mod.rs index 612359f4..008ff887 100644 --- a/candle-core/src/cpu_backend/mod.rs +++ b/candle-core/src/cpu_backend/mod.rs @@ -1246,7 +1246,7 @@ impl MatMul { impl Map2 for MatMul { const OP: &'static str = "mat_mul"; - #[cfg(all(not(feature = "mkl"), not(feature = "accelerate")))] + #[cfg(all(not(feature = "_mkl"), not(feature = "accelerate")))] fn f( &self, lhs: &[T], @@ -1411,7 +1411,7 @@ impl Map2 for MatMul { Ok(dst) } - #[cfg(feature = "mkl")] + #[cfg(feature = "_mkl")] fn f( &self, lhs: &[T], diff --git a/candle-core/src/lib.rs b/candle-core/src/lib.rs index 16dc8e02..acdd3461 100644 --- a/candle-core/src/lib.rs +++ b/candle-core/src/lib.rs @@ -68,7 +68,7 @@ mod indexer; pub mod layout; #[cfg(feature = "metal")] pub mod metal_backend; -#[cfg(feature = "mkl")] +#[cfg(feature = "_mkl")] mod mkl; pub mod npy; pub mod op; @@ -118,7 +118,7 @@ pub use metal_backend::{MetalDevice, MetalError, MetalStorage}; #[cfg(not(feature = "metal"))] pub use dummy_metal_backend::{MetalDevice, MetalError, MetalStorage}; -#[cfg(feature = "mkl")] +#[cfg(feature = "_mkl")] extern crate intel_mkl_src; #[cfg(feature = "accelerate")] diff --git a/candle-core/src/op.rs b/candle-core/src/op.rs index c5fc3fc4..501b7843 100644 --- a/candle-core/src/op.rs +++ b/candle-core/src/op.rs @@ -294,16 +294,16 @@ macro_rules! bin_op { $e(v1, v2) } - #[cfg(feature = "mkl")] + #[cfg(feature = "_mkl")] const F32_VEC: bool = true; - #[cfg(feature = "mkl")] + #[cfg(feature = "_mkl")] const F64_VEC: bool = true; - #[cfg(feature = "mkl")] + #[cfg(feature = "_mkl")] #[inline(always)] fn f32_vec(xs1: &[f32], xs2: &[f32], ys: &mut [f32]) { crate::mkl::$f32_vec(xs1, xs2, ys) } - #[cfg(feature = "mkl")] + #[cfg(feature = "_mkl")] #[inline(always)] fn f64_vec(xs1: &[f64], xs2: &[f64], ys: &mut [f64]) { crate::mkl::$f64_vec(xs1, xs2, ys) @@ -418,16 +418,16 @@ macro_rules! unary_op { todo!("no unary function for i64") } - #[cfg(feature = "mkl")] + #[cfg(feature = "_mkl")] const F32_VEC: bool = true; - #[cfg(feature = "mkl")] + #[cfg(feature = "_mkl")] const F64_VEC: bool = true; - #[cfg(feature = "mkl")] + #[cfg(feature = "_mkl")] #[inline(always)] fn f32_vec(xs: &[f32], ys: &mut [f32]) { crate::mkl::$f32_vec(xs, ys) } - #[cfg(feature = "mkl")] + #[cfg(feature = "_mkl")] #[inline(always)] fn f64_vec(xs: &[f64], ys: &mut [f64]) { crate::mkl::$f64_vec(xs, ys) @@ -518,19 +518,19 @@ impl UnaryOpT for Gelu { } const KERNEL: &'static str = "ugelu"; - #[cfg(feature = "mkl")] + #[cfg(feature = "_mkl")] const F32_VEC: bool = true; - #[cfg(feature = "mkl")] + #[cfg(feature = "_mkl")] #[inline(always)] fn f32_vec(xs: &[f32], ys: &mut [f32]) { crate::mkl::vs_gelu(xs, ys) } - #[cfg(feature = "mkl")] + #[cfg(feature = "_mkl")] const F64_VEC: bool = true; - #[cfg(feature = "mkl")] + #[cfg(feature = "_mkl")] #[inline(always)] fn f64_vec(xs: &[f64], ys: &mut [f64]) { crate::mkl::vd_gelu(xs, ys) @@ -625,19 +625,19 @@ impl UnaryOpT for Silu { } const KERNEL: &'static str = "usilu"; - #[cfg(feature = "mkl")] + #[cfg(feature = "_mkl")] const F32_VEC: bool = true; - #[cfg(feature = "mkl")] + #[cfg(feature = "_mkl")] #[inline(always)] fn f32_vec(xs: &[f32], ys: &mut [f32]) { crate::mkl::vs_silu(xs, ys) } - #[cfg(feature = "mkl")] + #[cfg(feature = "_mkl")] const F64_VEC: bool = true; - #[cfg(feature = "mkl")] + #[cfg(feature = "_mkl")] #[inline(always)] fn f64_vec(xs: &[f64], ys: &mut [f64]) { crate::mkl::vd_silu(xs, ys) diff --git a/candle-core/src/utils.rs b/candle-core/src/utils.rs index aa4d2705..9e0a9026 100644 --- a/candle-core/src/utils.rs +++ b/candle-core/src/utils.rs @@ -17,7 +17,7 @@ pub fn has_accelerate() -> bool { } pub fn has_mkl() -> bool { - cfg!(feature = "mkl") + cfg!(feature = "_mkl") } pub fn cuda_is_available() -> bool { diff --git a/candle-nn/Cargo.toml b/candle-nn/Cargo.toml index e62f4c32..c3f3cba0 100644 --- a/candle-nn/Cargo.toml +++ b/candle-nn/Cargo.toml @@ -33,7 +33,8 @@ criterion = { workspace = true } default = [] accelerate = ["dep:accelerate-src", "candle/accelerate"] cuda = ["candle/cuda"] -mkl = ["dep:intel-mkl-src", "candle/mkl"] +mkl = ["candle/mkl"] +_mkl = ["dep:intel-mkl-src", "candle/_mkl"] metal = ["candle/metal", "dep:candle-metal-kernels", "dep:metal"] [[bench]] diff --git a/candle-nn/benches/benchmarks/mod.rs b/candle-nn/benches/benchmarks/mod.rs index a34d8884..3620cc04 100644 --- a/candle-nn/benches/benchmarks/mod.rs +++ b/candle-nn/benches/benchmarks/mod.rs @@ -34,7 +34,7 @@ impl BenchDevice for Device { Device::Cpu => { let cpu_type = if cfg!(feature = "accelerate") { "accelerate" - } else if cfg!(feature = "mkl") { + } else if cfg!(feature = "_mkl") { "mkl" } else { "cpu" diff --git a/candle-nn/examples/basic_optimizer.rs b/candle-nn/examples/basic_optimizer.rs index 810f7a7a..d0d23ae1 100644 --- a/candle-nn/examples/basic_optimizer.rs +++ b/candle-nn/examples/basic_optimizer.rs @@ -1,4 +1,4 @@ -#[cfg(feature = "mkl")] +#[cfg(feature = "_mkl")] extern crate intel_mkl_src; #[cfg(feature = "accelerate")] diff --git a/candle-nn/examples/cpu_benchmarks.rs b/candle-nn/examples/cpu_benchmarks.rs index 430316b8..b3d5bcca 100644 --- a/candle-nn/examples/cpu_benchmarks.rs +++ b/candle-nn/examples/cpu_benchmarks.rs @@ -1,5 +1,5 @@ /// This example contains some simple benchmarks so that it's easy to run them in perf etc. -#[cfg(feature = "mkl")] +#[cfg(feature = "_mkl")] extern crate intel_mkl_src; #[cfg(feature = "accelerate")] diff --git a/candle-nn/tests/batch_norm.rs b/candle-nn/tests/batch_norm.rs index 8ce49c92..3d3905b3 100644 --- a/candle-nn/tests/batch_norm.rs +++ b/candle-nn/tests/batch_norm.rs @@ -1,4 +1,4 @@ -#[cfg(feature = "mkl")] +#[cfg(feature = "_mkl")] extern crate intel_mkl_src; #[cfg(feature = "accelerate")] diff --git a/candle-nn/tests/group_norm.rs b/candle-nn/tests/group_norm.rs index 8145a220..c5fde03c 100644 --- a/candle-nn/tests/group_norm.rs +++ b/candle-nn/tests/group_norm.rs @@ -18,7 +18,7 @@ t = torch.tensor( print(group_norm(t, num_groups=2)) print(group_norm(t, num_groups=3)) */ -#[cfg(feature = "mkl")] +#[cfg(feature = "_mkl")] extern crate intel_mkl_src; #[cfg(feature = "accelerate")] diff --git a/candle-nn/tests/kv_cache.rs b/candle-nn/tests/kv_cache.rs index b8d2ec48..42a34ad3 100644 --- a/candle-nn/tests/kv_cache.rs +++ b/candle-nn/tests/kv_cache.rs @@ -1,4 +1,4 @@ -#[cfg(feature = "mkl")] +#[cfg(feature = "_mkl")] extern crate intel_mkl_src; #[cfg(feature = "accelerate")] diff --git a/candle-nn/tests/layer_norm.rs b/candle-nn/tests/layer_norm.rs index 30f598b3..51be5af7 100644 --- a/candle-nn/tests/layer_norm.rs +++ b/candle-nn/tests/layer_norm.rs @@ -1,4 +1,4 @@ -#[cfg(feature = "mkl")] +#[cfg(feature = "_mkl")] extern crate intel_mkl_src; #[cfg(feature = "accelerate")] diff --git a/candle-nn/tests/loss.rs b/candle-nn/tests/loss.rs index ccfc029f..964b58ee 100644 --- a/candle-nn/tests/loss.rs +++ b/candle-nn/tests/loss.rs @@ -1,4 +1,4 @@ -#[cfg(feature = "mkl")] +#[cfg(feature = "_mkl")] extern crate intel_mkl_src; #[cfg(feature = "accelerate")] diff --git a/candle-nn/tests/ops.rs b/candle-nn/tests/ops.rs index 6c66f39f..eea01a0a 100644 --- a/candle-nn/tests/ops.rs +++ b/candle-nn/tests/ops.rs @@ -1,4 +1,4 @@ -#[cfg(feature = "mkl")] +#[cfg(feature = "_mkl")] extern crate intel_mkl_src; #[cfg(feature = "accelerate")] diff --git a/candle-nn/tests/optim.rs b/candle-nn/tests/optim.rs index 4eb14ed8..ee2faa5e 100644 --- a/candle-nn/tests/optim.rs +++ b/candle-nn/tests/optim.rs @@ -1,4 +1,4 @@ -#[cfg(feature = "mkl")] +#[cfg(feature = "_mkl")] extern crate intel_mkl_src; #[cfg(feature = "accelerate")] diff --git a/candle-nn/tests/rnn.rs b/candle-nn/tests/rnn.rs index 498c9188..058a99b7 100644 --- a/candle-nn/tests/rnn.rs +++ b/candle-nn/tests/rnn.rs @@ -1,4 +1,4 @@ -#[cfg(feature = "mkl")] +#[cfg(feature = "_mkl")] extern crate intel_mkl_src; #[cfg(feature = "accelerate")]