mirror of
https://github.com/huggingface/candle.git
synced 2025-06-17 02:58:50 +00:00
Compare commits
2 Commits
0.9.0-alph
...
mkl_link_f
Author | SHA1 | Date | |
---|---|---|---|
2c0f6b008e | |||
9862cd3ba2 |
@ -51,7 +51,7 @@ half = { version = "2.5.0", features = ["num-traits", "use-intrinsics", "rand_di
|
|||||||
hound = "3.5.1"
|
hound = "3.5.1"
|
||||||
image = { version = "0.25.2", default-features = false, features = ["jpeg", "png"] }
|
image = { version = "0.25.2", default-features = false, features = ["jpeg", "png"] }
|
||||||
imageproc = { version = "0.24.0", default-features = false }
|
imageproc = { version = "0.24.0", default-features = false }
|
||||||
intel-mkl-src = { version = "0.8.1", features = ["mkl-static-lp64-iomp"] }
|
intel-mkl-src = { version = "0.8.1" }
|
||||||
libc = { version = "0.2.147" }
|
libc = { version = "0.2.147" }
|
||||||
log = "0.4"
|
log = "0.4"
|
||||||
memmap2 = { version = "0.9.3", features = ["stable_deref_trait"] }
|
memmap2 = { version = "0.9.3", features = ["stable_deref_trait"] }
|
||||||
|
@ -45,7 +45,8 @@ criterion = { workspace = true }
|
|||||||
default = []
|
default = []
|
||||||
cuda = ["cudarc", "dep:candle-kernels", "dep:ug-cuda"]
|
cuda = ["cudarc", "dep:candle-kernels", "dep:ug-cuda"]
|
||||||
cudnn = ["cuda", "cudarc/cudnn"]
|
cudnn = ["cuda", "cudarc/cudnn"]
|
||||||
mkl = ["dep:libc", "dep:intel-mkl-src"]
|
_mkl = ["dep:libc", "dep:intel-mkl-src"]
|
||||||
|
mkl = ["_mkl", "intel-mkl-src?/mkl-static-lp64-iomp"]
|
||||||
accelerate = ["dep:libc", "dep:accelerate-src"]
|
accelerate = ["dep:libc", "dep:accelerate-src"]
|
||||||
metal = ["dep:metal", "dep:candle-metal-kernels", "dep:ug-metal"]
|
metal = ["dep:metal", "dep:candle-metal-kernels", "dep:ug-metal"]
|
||||||
|
|
||||||
|
@ -39,7 +39,7 @@ impl BenchDevice for Device {
|
|||||||
Device::Cpu => {
|
Device::Cpu => {
|
||||||
let cpu_type = if cfg!(feature = "accelerate") {
|
let cpu_type = if cfg!(feature = "accelerate") {
|
||||||
"accelerate"
|
"accelerate"
|
||||||
} else if cfg!(feature = "mkl") {
|
} else if cfg!(feature = "_mkl") {
|
||||||
"mkl"
|
"mkl"
|
||||||
} else {
|
} else {
|
||||||
"cpu"
|
"cpu"
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
extern crate intel_mkl_src;
|
extern crate intel_mkl_src;
|
||||||
|
|
||||||
#[cfg(feature = "accelerate")]
|
#[cfg(feature = "accelerate")]
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#[cfg(feature = "accelerate")]
|
#[cfg(feature = "accelerate")]
|
||||||
extern crate accelerate_src;
|
extern crate accelerate_src;
|
||||||
|
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
extern crate intel_mkl_src;
|
extern crate intel_mkl_src;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
extern crate intel_mkl_src;
|
extern crate intel_mkl_src;
|
||||||
|
|
||||||
#[cfg(feature = "accelerate")]
|
#[cfg(feature = "accelerate")]
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#[cfg(feature = "accelerate")]
|
#[cfg(feature = "accelerate")]
|
||||||
extern crate accelerate_src;
|
extern crate accelerate_src;
|
||||||
|
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
extern crate intel_mkl_src;
|
extern crate intel_mkl_src;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
@ -1246,7 +1246,7 @@ impl MatMul {
|
|||||||
impl Map2 for MatMul {
|
impl Map2 for MatMul {
|
||||||
const OP: &'static str = "mat_mul";
|
const OP: &'static str = "mat_mul";
|
||||||
|
|
||||||
#[cfg(all(not(feature = "mkl"), not(feature = "accelerate")))]
|
#[cfg(all(not(feature = "_mkl"), not(feature = "accelerate")))]
|
||||||
fn f<T: 'static + WithDType + num_traits::Num + Copy>(
|
fn f<T: 'static + WithDType + num_traits::Num + Copy>(
|
||||||
&self,
|
&self,
|
||||||
lhs: &[T],
|
lhs: &[T],
|
||||||
@ -1411,7 +1411,7 @@ impl Map2 for MatMul {
|
|||||||
Ok(dst)
|
Ok(dst)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
fn f<T: 'static + WithDType + num_traits::Num + Copy>(
|
fn f<T: 'static + WithDType + num_traits::Num + Copy>(
|
||||||
&self,
|
&self,
|
||||||
lhs: &[T],
|
lhs: &[T],
|
||||||
|
@ -68,7 +68,7 @@ mod indexer;
|
|||||||
pub mod layout;
|
pub mod layout;
|
||||||
#[cfg(feature = "metal")]
|
#[cfg(feature = "metal")]
|
||||||
pub mod metal_backend;
|
pub mod metal_backend;
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
mod mkl;
|
mod mkl;
|
||||||
pub mod npy;
|
pub mod npy;
|
||||||
pub mod op;
|
pub mod op;
|
||||||
@ -118,7 +118,7 @@ pub use metal_backend::{MetalDevice, MetalError, MetalStorage};
|
|||||||
#[cfg(not(feature = "metal"))]
|
#[cfg(not(feature = "metal"))]
|
||||||
pub use dummy_metal_backend::{MetalDevice, MetalError, MetalStorage};
|
pub use dummy_metal_backend::{MetalDevice, MetalError, MetalStorage};
|
||||||
|
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
extern crate intel_mkl_src;
|
extern crate intel_mkl_src;
|
||||||
|
|
||||||
#[cfg(feature = "accelerate")]
|
#[cfg(feature = "accelerate")]
|
||||||
|
@ -294,16 +294,16 @@ macro_rules! bin_op {
|
|||||||
$e(v1, v2)
|
$e(v1, v2)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
const F32_VEC: bool = true;
|
const F32_VEC: bool = true;
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
const F64_VEC: bool = true;
|
const F64_VEC: bool = true;
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn f32_vec(xs1: &[f32], xs2: &[f32], ys: &mut [f32]) {
|
fn f32_vec(xs1: &[f32], xs2: &[f32], ys: &mut [f32]) {
|
||||||
crate::mkl::$f32_vec(xs1, xs2, ys)
|
crate::mkl::$f32_vec(xs1, xs2, ys)
|
||||||
}
|
}
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn f64_vec(xs1: &[f64], xs2: &[f64], ys: &mut [f64]) {
|
fn f64_vec(xs1: &[f64], xs2: &[f64], ys: &mut [f64]) {
|
||||||
crate::mkl::$f64_vec(xs1, xs2, ys)
|
crate::mkl::$f64_vec(xs1, xs2, ys)
|
||||||
@ -418,16 +418,16 @@ macro_rules! unary_op {
|
|||||||
todo!("no unary function for i64")
|
todo!("no unary function for i64")
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
const F32_VEC: bool = true;
|
const F32_VEC: bool = true;
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
const F64_VEC: bool = true;
|
const F64_VEC: bool = true;
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn f32_vec(xs: &[f32], ys: &mut [f32]) {
|
fn f32_vec(xs: &[f32], ys: &mut [f32]) {
|
||||||
crate::mkl::$f32_vec(xs, ys)
|
crate::mkl::$f32_vec(xs, ys)
|
||||||
}
|
}
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn f64_vec(xs: &[f64], ys: &mut [f64]) {
|
fn f64_vec(xs: &[f64], ys: &mut [f64]) {
|
||||||
crate::mkl::$f64_vec(xs, ys)
|
crate::mkl::$f64_vec(xs, ys)
|
||||||
@ -518,19 +518,19 @@ impl UnaryOpT for Gelu {
|
|||||||
}
|
}
|
||||||
const KERNEL: &'static str = "ugelu";
|
const KERNEL: &'static str = "ugelu";
|
||||||
|
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
const F32_VEC: bool = true;
|
const F32_VEC: bool = true;
|
||||||
|
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn f32_vec(xs: &[f32], ys: &mut [f32]) {
|
fn f32_vec(xs: &[f32], ys: &mut [f32]) {
|
||||||
crate::mkl::vs_gelu(xs, ys)
|
crate::mkl::vs_gelu(xs, ys)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
const F64_VEC: bool = true;
|
const F64_VEC: bool = true;
|
||||||
|
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn f64_vec(xs: &[f64], ys: &mut [f64]) {
|
fn f64_vec(xs: &[f64], ys: &mut [f64]) {
|
||||||
crate::mkl::vd_gelu(xs, ys)
|
crate::mkl::vd_gelu(xs, ys)
|
||||||
@ -625,19 +625,19 @@ impl UnaryOpT for Silu {
|
|||||||
}
|
}
|
||||||
const KERNEL: &'static str = "usilu";
|
const KERNEL: &'static str = "usilu";
|
||||||
|
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
const F32_VEC: bool = true;
|
const F32_VEC: bool = true;
|
||||||
|
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn f32_vec(xs: &[f32], ys: &mut [f32]) {
|
fn f32_vec(xs: &[f32], ys: &mut [f32]) {
|
||||||
crate::mkl::vs_silu(xs, ys)
|
crate::mkl::vs_silu(xs, ys)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
const F64_VEC: bool = true;
|
const F64_VEC: bool = true;
|
||||||
|
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn f64_vec(xs: &[f64], ys: &mut [f64]) {
|
fn f64_vec(xs: &[f64], ys: &mut [f64]) {
|
||||||
crate::mkl::vd_silu(xs, ys)
|
crate::mkl::vd_silu(xs, ys)
|
||||||
|
@ -17,7 +17,7 @@ pub fn has_accelerate() -> bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn has_mkl() -> bool {
|
pub fn has_mkl() -> bool {
|
||||||
cfg!(feature = "mkl")
|
cfg!(feature = "_mkl")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn cuda_is_available() -> bool {
|
pub fn cuda_is_available() -> bool {
|
||||||
|
@ -33,7 +33,8 @@ criterion = { workspace = true }
|
|||||||
default = []
|
default = []
|
||||||
accelerate = ["dep:accelerate-src", "candle/accelerate"]
|
accelerate = ["dep:accelerate-src", "candle/accelerate"]
|
||||||
cuda = ["candle/cuda"]
|
cuda = ["candle/cuda"]
|
||||||
mkl = ["dep:intel-mkl-src", "candle/mkl"]
|
_mkl = ["dep:intel-mkl-src", "candle/_mkl"]
|
||||||
|
mkl = ["candle/mkl"]
|
||||||
metal = ["candle/metal", "dep:candle-metal-kernels", "dep:metal"]
|
metal = ["candle/metal", "dep:candle-metal-kernels", "dep:metal"]
|
||||||
|
|
||||||
[[bench]]
|
[[bench]]
|
||||||
|
@ -34,7 +34,7 @@ impl BenchDevice for Device {
|
|||||||
Device::Cpu => {
|
Device::Cpu => {
|
||||||
let cpu_type = if cfg!(feature = "accelerate") {
|
let cpu_type = if cfg!(feature = "accelerate") {
|
||||||
"accelerate"
|
"accelerate"
|
||||||
} else if cfg!(feature = "mkl") {
|
} else if cfg!(feature = "_mkl") {
|
||||||
"mkl"
|
"mkl"
|
||||||
} else {
|
} else {
|
||||||
"cpu"
|
"cpu"
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
extern crate intel_mkl_src;
|
extern crate intel_mkl_src;
|
||||||
|
|
||||||
#[cfg(feature = "accelerate")]
|
#[cfg(feature = "accelerate")]
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/// This example contains some simple benchmarks so that it's easy to run them in perf etc.
|
/// This example contains some simple benchmarks so that it's easy to run them in perf etc.
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
extern crate intel_mkl_src;
|
extern crate intel_mkl_src;
|
||||||
|
|
||||||
#[cfg(feature = "accelerate")]
|
#[cfg(feature = "accelerate")]
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
extern crate intel_mkl_src;
|
extern crate intel_mkl_src;
|
||||||
|
|
||||||
#[cfg(feature = "accelerate")]
|
#[cfg(feature = "accelerate")]
|
||||||
|
@ -18,7 +18,7 @@ t = torch.tensor(
|
|||||||
print(group_norm(t, num_groups=2))
|
print(group_norm(t, num_groups=2))
|
||||||
print(group_norm(t, num_groups=3))
|
print(group_norm(t, num_groups=3))
|
||||||
*/
|
*/
|
||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
extern crate intel_mkl_src;
|
extern crate intel_mkl_src;
|
||||||
|
|
||||||
#[cfg(feature = "accelerate")]
|
#[cfg(feature = "accelerate")]
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
extern crate intel_mkl_src;
|
extern crate intel_mkl_src;
|
||||||
|
|
||||||
#[cfg(feature = "accelerate")]
|
#[cfg(feature = "accelerate")]
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
extern crate intel_mkl_src;
|
extern crate intel_mkl_src;
|
||||||
|
|
||||||
#[cfg(feature = "accelerate")]
|
#[cfg(feature = "accelerate")]
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
extern crate intel_mkl_src;
|
extern crate intel_mkl_src;
|
||||||
|
|
||||||
#[cfg(feature = "accelerate")]
|
#[cfg(feature = "accelerate")]
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
extern crate intel_mkl_src;
|
extern crate intel_mkl_src;
|
||||||
|
|
||||||
#[cfg(feature = "accelerate")]
|
#[cfg(feature = "accelerate")]
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
extern crate intel_mkl_src;
|
extern crate intel_mkl_src;
|
||||||
|
|
||||||
#[cfg(feature = "accelerate")]
|
#[cfg(feature = "accelerate")]
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#[cfg(feature = "mkl")]
|
#[cfg(feature = "_mkl")]
|
||||||
extern crate intel_mkl_src;
|
extern crate intel_mkl_src;
|
||||||
|
|
||||||
#[cfg(feature = "accelerate")]
|
#[cfg(feature = "accelerate")]
|
||||||
|
Reference in New Issue
Block a user