Compare commits

...

2 Commits

Author SHA1 Message Date
2c0f6b008e Fixing order. 2025-03-28 11:43:33 +01:00
9862cd3ba2 Splitting the features to enable different mkl linking. 2025-03-28 10:13:13 +01:00
23 changed files with 42 additions and 40 deletions

View File

@ -51,7 +51,7 @@ half = { version = "2.5.0", features = ["num-traits", "use-intrinsics", "rand_di
hound = "3.5.1" hound = "3.5.1"
image = { version = "0.25.2", default-features = false, features = ["jpeg", "png"] } image = { version = "0.25.2", default-features = false, features = ["jpeg", "png"] }
imageproc = { version = "0.24.0", default-features = false } imageproc = { version = "0.24.0", default-features = false }
intel-mkl-src = { version = "0.8.1", features = ["mkl-static-lp64-iomp"] } intel-mkl-src = { version = "0.8.1" }
libc = { version = "0.2.147" } libc = { version = "0.2.147" }
log = "0.4" log = "0.4"
memmap2 = { version = "0.9.3", features = ["stable_deref_trait"] } memmap2 = { version = "0.9.3", features = ["stable_deref_trait"] }

View File

@ -45,7 +45,8 @@ criterion = { workspace = true }
default = [] default = []
cuda = ["cudarc", "dep:candle-kernels", "dep:ug-cuda"] cuda = ["cudarc", "dep:candle-kernels", "dep:ug-cuda"]
cudnn = ["cuda", "cudarc/cudnn"] cudnn = ["cuda", "cudarc/cudnn"]
mkl = ["dep:libc", "dep:intel-mkl-src"] _mkl = ["dep:libc", "dep:intel-mkl-src"]
mkl = ["_mkl", "intel-mkl-src?/mkl-static-lp64-iomp"]
accelerate = ["dep:libc", "dep:accelerate-src"] accelerate = ["dep:libc", "dep:accelerate-src"]
metal = ["dep:metal", "dep:candle-metal-kernels", "dep:ug-metal"] metal = ["dep:metal", "dep:candle-metal-kernels", "dep:ug-metal"]

View File

@ -39,7 +39,7 @@ impl BenchDevice for Device {
Device::Cpu => { Device::Cpu => {
let cpu_type = if cfg!(feature = "accelerate") { let cpu_type = if cfg!(feature = "accelerate") {
"accelerate" "accelerate"
} else if cfg!(feature = "mkl") { } else if cfg!(feature = "_mkl") {
"mkl" "mkl"
} else { } else {
"cpu" "cpu"

View File

@ -1,4 +1,4 @@
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
extern crate intel_mkl_src; extern crate intel_mkl_src;
#[cfg(feature = "accelerate")] #[cfg(feature = "accelerate")]

View File

@ -1,7 +1,7 @@
#[cfg(feature = "accelerate")] #[cfg(feature = "accelerate")]
extern crate accelerate_src; extern crate accelerate_src;
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
extern crate intel_mkl_src; extern crate intel_mkl_src;
use anyhow::Result; use anyhow::Result;

View File

@ -1,4 +1,4 @@
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
extern crate intel_mkl_src; extern crate intel_mkl_src;
#[cfg(feature = "accelerate")] #[cfg(feature = "accelerate")]

View File

@ -1,7 +1,7 @@
#[cfg(feature = "accelerate")] #[cfg(feature = "accelerate")]
extern crate accelerate_src; extern crate accelerate_src;
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
extern crate intel_mkl_src; extern crate intel_mkl_src;
use anyhow::Result; use anyhow::Result;

View File

@ -1246,7 +1246,7 @@ impl MatMul {
impl Map2 for MatMul { impl Map2 for MatMul {
const OP: &'static str = "mat_mul"; const OP: &'static str = "mat_mul";
#[cfg(all(not(feature = "mkl"), not(feature = "accelerate")))] #[cfg(all(not(feature = "_mkl"), not(feature = "accelerate")))]
fn f<T: 'static + WithDType + num_traits::Num + Copy>( fn f<T: 'static + WithDType + num_traits::Num + Copy>(
&self, &self,
lhs: &[T], lhs: &[T],
@ -1411,7 +1411,7 @@ impl Map2 for MatMul {
Ok(dst) Ok(dst)
} }
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
fn f<T: 'static + WithDType + num_traits::Num + Copy>( fn f<T: 'static + WithDType + num_traits::Num + Copy>(
&self, &self,
lhs: &[T], lhs: &[T],

View File

@ -68,7 +68,7 @@ mod indexer;
pub mod layout; pub mod layout;
#[cfg(feature = "metal")] #[cfg(feature = "metal")]
pub mod metal_backend; pub mod metal_backend;
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
mod mkl; mod mkl;
pub mod npy; pub mod npy;
pub mod op; pub mod op;
@ -118,7 +118,7 @@ pub use metal_backend::{MetalDevice, MetalError, MetalStorage};
#[cfg(not(feature = "metal"))] #[cfg(not(feature = "metal"))]
pub use dummy_metal_backend::{MetalDevice, MetalError, MetalStorage}; pub use dummy_metal_backend::{MetalDevice, MetalError, MetalStorage};
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
extern crate intel_mkl_src; extern crate intel_mkl_src;
#[cfg(feature = "accelerate")] #[cfg(feature = "accelerate")]

View File

@ -294,16 +294,16 @@ macro_rules! bin_op {
$e(v1, v2) $e(v1, v2)
} }
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
const F32_VEC: bool = true; const F32_VEC: bool = true;
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
const F64_VEC: bool = true; const F64_VEC: bool = true;
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
#[inline(always)] #[inline(always)]
fn f32_vec(xs1: &[f32], xs2: &[f32], ys: &mut [f32]) { fn f32_vec(xs1: &[f32], xs2: &[f32], ys: &mut [f32]) {
crate::mkl::$f32_vec(xs1, xs2, ys) crate::mkl::$f32_vec(xs1, xs2, ys)
} }
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
#[inline(always)] #[inline(always)]
fn f64_vec(xs1: &[f64], xs2: &[f64], ys: &mut [f64]) { fn f64_vec(xs1: &[f64], xs2: &[f64], ys: &mut [f64]) {
crate::mkl::$f64_vec(xs1, xs2, ys) crate::mkl::$f64_vec(xs1, xs2, ys)
@ -418,16 +418,16 @@ macro_rules! unary_op {
todo!("no unary function for i64") todo!("no unary function for i64")
} }
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
const F32_VEC: bool = true; const F32_VEC: bool = true;
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
const F64_VEC: bool = true; const F64_VEC: bool = true;
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
#[inline(always)] #[inline(always)]
fn f32_vec(xs: &[f32], ys: &mut [f32]) { fn f32_vec(xs: &[f32], ys: &mut [f32]) {
crate::mkl::$f32_vec(xs, ys) crate::mkl::$f32_vec(xs, ys)
} }
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
#[inline(always)] #[inline(always)]
fn f64_vec(xs: &[f64], ys: &mut [f64]) { fn f64_vec(xs: &[f64], ys: &mut [f64]) {
crate::mkl::$f64_vec(xs, ys) crate::mkl::$f64_vec(xs, ys)
@ -518,19 +518,19 @@ impl UnaryOpT for Gelu {
} }
const KERNEL: &'static str = "ugelu"; const KERNEL: &'static str = "ugelu";
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
const F32_VEC: bool = true; const F32_VEC: bool = true;
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
#[inline(always)] #[inline(always)]
fn f32_vec(xs: &[f32], ys: &mut [f32]) { fn f32_vec(xs: &[f32], ys: &mut [f32]) {
crate::mkl::vs_gelu(xs, ys) crate::mkl::vs_gelu(xs, ys)
} }
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
const F64_VEC: bool = true; const F64_VEC: bool = true;
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
#[inline(always)] #[inline(always)]
fn f64_vec(xs: &[f64], ys: &mut [f64]) { fn f64_vec(xs: &[f64], ys: &mut [f64]) {
crate::mkl::vd_gelu(xs, ys) crate::mkl::vd_gelu(xs, ys)
@ -625,19 +625,19 @@ impl UnaryOpT for Silu {
} }
const KERNEL: &'static str = "usilu"; const KERNEL: &'static str = "usilu";
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
const F32_VEC: bool = true; const F32_VEC: bool = true;
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
#[inline(always)] #[inline(always)]
fn f32_vec(xs: &[f32], ys: &mut [f32]) { fn f32_vec(xs: &[f32], ys: &mut [f32]) {
crate::mkl::vs_silu(xs, ys) crate::mkl::vs_silu(xs, ys)
} }
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
const F64_VEC: bool = true; const F64_VEC: bool = true;
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
#[inline(always)] #[inline(always)]
fn f64_vec(xs: &[f64], ys: &mut [f64]) { fn f64_vec(xs: &[f64], ys: &mut [f64]) {
crate::mkl::vd_silu(xs, ys) crate::mkl::vd_silu(xs, ys)

View File

@ -17,7 +17,7 @@ pub fn has_accelerate() -> bool {
} }
pub fn has_mkl() -> bool { pub fn has_mkl() -> bool {
cfg!(feature = "mkl") cfg!(feature = "_mkl")
} }
pub fn cuda_is_available() -> bool { pub fn cuda_is_available() -> bool {

View File

@ -33,7 +33,8 @@ criterion = { workspace = true }
default = [] default = []
accelerate = ["dep:accelerate-src", "candle/accelerate"] accelerate = ["dep:accelerate-src", "candle/accelerate"]
cuda = ["candle/cuda"] cuda = ["candle/cuda"]
mkl = ["dep:intel-mkl-src", "candle/mkl"] _mkl = ["dep:intel-mkl-src", "candle/_mkl"]
mkl = ["candle/mkl"]
metal = ["candle/metal", "dep:candle-metal-kernels", "dep:metal"] metal = ["candle/metal", "dep:candle-metal-kernels", "dep:metal"]
[[bench]] [[bench]]

View File

@ -34,7 +34,7 @@ impl BenchDevice for Device {
Device::Cpu => { Device::Cpu => {
let cpu_type = if cfg!(feature = "accelerate") { let cpu_type = if cfg!(feature = "accelerate") {
"accelerate" "accelerate"
} else if cfg!(feature = "mkl") { } else if cfg!(feature = "_mkl") {
"mkl" "mkl"
} else { } else {
"cpu" "cpu"

View File

@ -1,4 +1,4 @@
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
extern crate intel_mkl_src; extern crate intel_mkl_src;
#[cfg(feature = "accelerate")] #[cfg(feature = "accelerate")]

View File

@ -1,5 +1,5 @@
/// This example contains some simple benchmarks so that it's easy to run them in perf etc. /// This example contains some simple benchmarks so that it's easy to run them in perf etc.
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
extern crate intel_mkl_src; extern crate intel_mkl_src;
#[cfg(feature = "accelerate")] #[cfg(feature = "accelerate")]

View File

@ -1,4 +1,4 @@
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
extern crate intel_mkl_src; extern crate intel_mkl_src;
#[cfg(feature = "accelerate")] #[cfg(feature = "accelerate")]

View File

@ -18,7 +18,7 @@ t = torch.tensor(
print(group_norm(t, num_groups=2)) print(group_norm(t, num_groups=2))
print(group_norm(t, num_groups=3)) print(group_norm(t, num_groups=3))
*/ */
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
extern crate intel_mkl_src; extern crate intel_mkl_src;
#[cfg(feature = "accelerate")] #[cfg(feature = "accelerate")]

View File

@ -1,4 +1,4 @@
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
extern crate intel_mkl_src; extern crate intel_mkl_src;
#[cfg(feature = "accelerate")] #[cfg(feature = "accelerate")]

View File

@ -1,4 +1,4 @@
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
extern crate intel_mkl_src; extern crate intel_mkl_src;
#[cfg(feature = "accelerate")] #[cfg(feature = "accelerate")]

View File

@ -1,4 +1,4 @@
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
extern crate intel_mkl_src; extern crate intel_mkl_src;
#[cfg(feature = "accelerate")] #[cfg(feature = "accelerate")]

View File

@ -1,4 +1,4 @@
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
extern crate intel_mkl_src; extern crate intel_mkl_src;
#[cfg(feature = "accelerate")] #[cfg(feature = "accelerate")]

View File

@ -1,4 +1,4 @@
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
extern crate intel_mkl_src; extern crate intel_mkl_src;
#[cfg(feature = "accelerate")] #[cfg(feature = "accelerate")]

View File

@ -1,4 +1,4 @@
#[cfg(feature = "mkl")] #[cfg(feature = "_mkl")]
extern crate intel_mkl_src; extern crate intel_mkl_src;
#[cfg(feature = "accelerate")] #[cfg(feature = "accelerate")]