mirror of
https://github.com/huggingface/candle.git
synced 2025-06-20 12:06:35 +00:00
Compare commits
1 Commits
mkl_link_f
...
cudarc_fre
Author | SHA1 | Date | |
---|---|---|---|
ec6d7ca773 |
@ -43,7 +43,7 @@ candle-onnx = { path = "./candle-onnx", version = "0.8.4" }
|
|||||||
candle-transformers = { path = "./candle-transformers", version = "0.8.4" }
|
candle-transformers = { path = "./candle-transformers", version = "0.8.4" }
|
||||||
clap = { version = "4.2.4", features = ["derive"] }
|
clap = { version = "4.2.4", features = ["derive"] }
|
||||||
criterion = { version = "0.5.1", default-features=false }
|
criterion = { version = "0.5.1", default-features=false }
|
||||||
cudarc = { version = "0.13.5", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16", "cuda-version-from-build-system", "dynamic-linking"], default-features=false }
|
cudarc = { version = "0.13.5", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16"], default-features=false }
|
||||||
fancy-regex = "0.13.0"
|
fancy-regex = "0.13.0"
|
||||||
gemm = { version = "0.17.0", features = ["wasm-simd128-enable"] }
|
gemm = { version = "0.17.0", features = ["wasm-simd128-enable"] }
|
||||||
hf-hub = "0.4.1"
|
hf-hub = "0.4.1"
|
||||||
|
@ -43,8 +43,9 @@ criterion = { workspace = true }
|
|||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = []
|
default = []
|
||||||
cuda = ["cudarc", "dep:candle-kernels", "dep:ug-cuda"]
|
_cuda = ["dep:cudarc", "dep:candle-kernels", "dep:ug-cuda"]
|
||||||
cudnn = ["cuda", "cudarc/cudnn"]
|
# cuda = ["_cuda", "cudarc?/cuda-version-from-build-system", "cudarc?/dynamic-linking"]
|
||||||
|
cudnn = ["_cuda", "cudarc?/cudnn"]
|
||||||
_mkl = ["dep:libc", "dep:intel-mkl-src"]
|
_mkl = ["dep:libc", "dep:intel-mkl-src"]
|
||||||
mkl = ["_mkl", "intel-mkl-src?/mkl-static-lp64-iomp"]
|
mkl = ["_mkl", "intel-mkl-src?/mkl-static-lp64-iomp"]
|
||||||
accelerate = ["dep:libc", "dep:accelerate-src"]
|
accelerate = ["dep:libc", "dep:accelerate-src"]
|
||||||
|
@ -20,9 +20,9 @@ impl BenchDevice for Device {
|
|||||||
match self {
|
match self {
|
||||||
Device::Cpu => Ok(()),
|
Device::Cpu => Ok(()),
|
||||||
Device::Cuda(device) => {
|
Device::Cuda(device) => {
|
||||||
#[cfg(feature = "cuda")]
|
#[cfg(feature = "_cuda")]
|
||||||
return Ok(device.synchronize()?);
|
return Ok(device.synchronize()?);
|
||||||
#[cfg(not(feature = "cuda"))]
|
#[cfg(not(feature = "_cuda"))]
|
||||||
panic!("Cuda device without cuda feature enabled: {:?}", device)
|
panic!("Cuda device without cuda feature enabled: {:?}", device)
|
||||||
}
|
}
|
||||||
Device::Metal(device) => {
|
Device::Metal(device) => {
|
||||||
@ -61,7 +61,7 @@ impl BenchDeviceHandler {
|
|||||||
let mut devices = Vec::new();
|
let mut devices = Vec::new();
|
||||||
if cfg!(feature = "metal") {
|
if cfg!(feature = "metal") {
|
||||||
devices.push(Device::new_metal(0)?);
|
devices.push(Device::new_metal(0)?);
|
||||||
} else if cfg!(feature = "cuda") {
|
} else if cfg!(feature = "_cuda") {
|
||||||
devices.push(Device::new_cuda(0)?);
|
devices.push(Device::new_cuda(0)?);
|
||||||
}
|
}
|
||||||
devices.push(Device::Cpu);
|
devices.push(Device::Cpu);
|
||||||
|
@ -378,7 +378,7 @@ impl Tensor {
|
|||||||
|
|
||||||
pub struct UgIOp1 {
|
pub struct UgIOp1 {
|
||||||
name: &'static str,
|
name: &'static str,
|
||||||
#[cfg(feature = "cuda")]
|
#[cfg(feature = "_cuda")]
|
||||||
func: cudarc::driver::CudaFunction,
|
func: cudarc::driver::CudaFunction,
|
||||||
#[cfg(feature = "metal")]
|
#[cfg(feature = "metal")]
|
||||||
func: metal::ComputePipelineState,
|
func: metal::ComputePipelineState,
|
||||||
@ -392,7 +392,7 @@ impl UgIOp1 {
|
|||||||
kernel: ug::lang::ssa::Kernel,
|
kernel: ug::lang::ssa::Kernel,
|
||||||
device: &crate::Device,
|
device: &crate::Device,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
#[cfg(feature = "cuda")]
|
#[cfg(feature = "_cuda")]
|
||||||
{
|
{
|
||||||
let device = device.as_cuda_device()?;
|
let device = device.as_cuda_device()?;
|
||||||
let func = device.compile(name, kernel)?;
|
let func = device.compile(name, kernel)?;
|
||||||
@ -404,7 +404,7 @@ impl UgIOp1 {
|
|||||||
let func = device.compile(name, kernel)?;
|
let func = device.compile(name, kernel)?;
|
||||||
Ok(Self { name, func })
|
Ok(Self { name, func })
|
||||||
}
|
}
|
||||||
#[cfg(not(any(feature = "cuda", feature = "metal")))]
|
#[cfg(not(any(feature = "_cuda", feature = "metal")))]
|
||||||
{
|
{
|
||||||
Ok(Self { name })
|
Ok(Self { name })
|
||||||
}
|
}
|
||||||
@ -456,7 +456,7 @@ impl InplaceOp1 for UgIOp1 {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "cuda")]
|
#[cfg(feature = "_cuda")]
|
||||||
fn cuda_fwd(&self, sto: &mut CudaStorage, layout: &Layout) -> Result<()> {
|
fn cuda_fwd(&self, sto: &mut CudaStorage, layout: &Layout) -> Result<()> {
|
||||||
use crate::cuda_backend::WrapErr;
|
use crate::cuda_backend::WrapErr;
|
||||||
use cudarc::driver::LaunchAsync;
|
use cudarc::driver::LaunchAsync;
|
||||||
|
@ -55,7 +55,7 @@ pub mod conv;
|
|||||||
mod convert;
|
mod convert;
|
||||||
pub mod cpu;
|
pub mod cpu;
|
||||||
pub mod cpu_backend;
|
pub mod cpu_backend;
|
||||||
#[cfg(feature = "cuda")]
|
#[cfg(feature = "_cuda")]
|
||||||
pub mod cuda_backend;
|
pub mod cuda_backend;
|
||||||
mod custom_op;
|
mod custom_op;
|
||||||
mod device;
|
mod device;
|
||||||
@ -104,10 +104,10 @@ pub use strided_index::{StridedBlocks, StridedIndex};
|
|||||||
pub use tensor::{Tensor, TensorId};
|
pub use tensor::{Tensor, TensorId};
|
||||||
pub use variable::Var;
|
pub use variable::Var;
|
||||||
|
|
||||||
#[cfg(feature = "cuda")]
|
#[cfg(feature = "_cuda")]
|
||||||
pub use cuda_backend as cuda;
|
pub use cuda_backend as cuda;
|
||||||
|
|
||||||
#[cfg(not(feature = "cuda"))]
|
#[cfg(not(feature = "_cuda"))]
|
||||||
pub use dummy_cuda_backend as cuda;
|
pub use dummy_cuda_backend as cuda;
|
||||||
|
|
||||||
pub use cuda::{CudaDevice, CudaStorage};
|
pub use cuda::{CudaDevice, CudaStorage};
|
||||||
|
@ -16,9 +16,9 @@ pub mod metal;
|
|||||||
mod metal {
|
mod metal {
|
||||||
pub use super::dummy_metal::*;
|
pub use super::dummy_metal::*;
|
||||||
}
|
}
|
||||||
#[cfg(feature = "cuda")]
|
#[cfg(feature = "_cuda")]
|
||||||
pub mod cuda;
|
pub mod cuda;
|
||||||
#[cfg(not(feature = "cuda"))]
|
#[cfg(not(feature = "_cuda"))]
|
||||||
mod cuda {
|
mod cuda {
|
||||||
pub use super::dummy_cuda::*;
|
pub use super::dummy_cuda::*;
|
||||||
}
|
}
|
||||||
|
@ -52,7 +52,7 @@ impl ArgSort {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "cuda")]
|
#[cfg(feature = "_cuda")]
|
||||||
mod cuda {
|
mod cuda {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::cuda_backend::cudarc::driver::{
|
use crate::cuda_backend::cudarc::driver::{
|
||||||
@ -118,7 +118,7 @@ impl crate::CustomOp1 for ArgSort {
|
|||||||
Ok((sort_indexes, layout.shape().into()))
|
Ok((sort_indexes, layout.shape().into()))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "cuda")]
|
#[cfg(feature = "_cuda")]
|
||||||
fn cuda_fwd(
|
fn cuda_fwd(
|
||||||
&self,
|
&self,
|
||||||
storage: &crate::CudaStorage,
|
storage: &crate::CudaStorage,
|
||||||
|
@ -10,7 +10,7 @@ macro_rules! test_device {
|
|||||||
$fn_name(&Device::Cpu)
|
$fn_name(&Device::Cpu)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "cuda")]
|
#[cfg(feature = "_cuda")]
|
||||||
#[test]
|
#[test]
|
||||||
fn $test_cuda() -> Result<()> {
|
fn $test_cuda() -> Result<()> {
|
||||||
$fn_name(&Device::new_cuda(0)?)
|
$fn_name(&Device::new_cuda(0)?)
|
||||||
|
@ -21,7 +21,7 @@ pub fn has_mkl() -> bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn cuda_is_available() -> bool {
|
pub fn cuda_is_available() -> bool {
|
||||||
cfg!(feature = "cuda")
|
cfg!(feature = "_cuda")
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn metal_is_available() -> bool {
|
pub fn metal_is_available() -> bool {
|
||||||
|
@ -144,7 +144,7 @@ fn inplace_op1() -> Result<()> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(any(feature = "cuda", feature = "metal"))]
|
#[cfg(any(feature = "_cuda", feature = "metal"))]
|
||||||
#[allow(clippy::approx_constant)]
|
#[allow(clippy::approx_constant)]
|
||||||
#[test]
|
#[test]
|
||||||
fn ug_op() -> Result<()> {
|
fn ug_op() -> Result<()> {
|
||||||
|
@ -11,7 +11,7 @@ license = "MIT OR Apache-2.0"
|
|||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
candle = { path = "../candle-core", features = ["cuda"], package = "candle-core", version = "0.8.4" }
|
candle = { path = "../candle-core", features = ["_cuda"], package = "candle-core", version = "0.8.4" }
|
||||||
half = { version = "2.3.1", features = ["num-traits"] }
|
half = { version = "2.3.1", features = ["num-traits"] }
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
@ -21,4 +21,4 @@ anyhow = { version = "1", features = ["backtrace"] }
|
|||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
anyhow = { version = "1", features = ["backtrace"] }
|
anyhow = { version = "1", features = ["backtrace"] }
|
||||||
candle-nn = { path = "../candle-nn", features = ["cuda"] }
|
candle-nn = { path = "../candle-nn", features = ["_cuda"] }
|
||||||
|
@ -32,6 +32,7 @@ criterion = { workspace = true }
|
|||||||
[features]
|
[features]
|
||||||
default = []
|
default = []
|
||||||
accelerate = ["dep:accelerate-src", "candle/accelerate"]
|
accelerate = ["dep:accelerate-src", "candle/accelerate"]
|
||||||
|
_cuda = ["candle/_cuda"]
|
||||||
cuda = ["candle/cuda"]
|
cuda = ["candle/cuda"]
|
||||||
_mkl = ["dep:intel-mkl-src", "candle/_mkl"]
|
_mkl = ["dep:intel-mkl-src", "candle/_mkl"]
|
||||||
mkl = ["candle/mkl"]
|
mkl = ["candle/mkl"]
|
||||||
|
@ -15,9 +15,9 @@ impl BenchDevice for Device {
|
|||||||
match self {
|
match self {
|
||||||
Device::Cpu => Ok(()),
|
Device::Cpu => Ok(()),
|
||||||
Device::Cuda(device) => {
|
Device::Cuda(device) => {
|
||||||
#[cfg(feature = "cuda")]
|
#[cfg(feature = "_cuda")]
|
||||||
return Ok(device.synchronize()?);
|
return Ok(device.synchronize()?);
|
||||||
#[cfg(not(feature = "cuda"))]
|
#[cfg(not(feature = "_cuda"))]
|
||||||
panic!("Cuda device without cuda feature enabled: {:?}", device)
|
panic!("Cuda device without cuda feature enabled: {:?}", device)
|
||||||
}
|
}
|
||||||
Device::Metal(device) => {
|
Device::Metal(device) => {
|
||||||
@ -56,7 +56,7 @@ impl BenchDeviceHandler {
|
|||||||
let mut devices = Vec::new();
|
let mut devices = Vec::new();
|
||||||
if cfg!(feature = "metal") {
|
if cfg!(feature = "metal") {
|
||||||
devices.push(Device::new_metal(0)?);
|
devices.push(Device::new_metal(0)?);
|
||||||
} else if cfg!(feature = "cuda") {
|
} else if cfg!(feature = "_cuda") {
|
||||||
devices.push(Device::new_cuda(0)?);
|
devices.push(Device::new_cuda(0)?);
|
||||||
}
|
}
|
||||||
devices.push(Device::Cpu);
|
devices.push(Device::Cpu);
|
||||||
|
@ -82,7 +82,7 @@ impl candle::CustomOp1 for Sigmoid {
|
|||||||
Ok((storage, layout.shape().clone()))
|
Ok((storage, layout.shape().clone()))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "cuda")]
|
#[cfg(feature = "_cuda")]
|
||||||
fn cuda_fwd(
|
fn cuda_fwd(
|
||||||
&self,
|
&self,
|
||||||
storage: &candle::CudaStorage,
|
storage: &candle::CudaStorage,
|
||||||
@ -333,7 +333,7 @@ impl candle::CustomOp1 for SoftmaxLastDim {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "cuda")]
|
#[cfg(feature = "_cuda")]
|
||||||
fn cuda_fwd(
|
fn cuda_fwd(
|
||||||
&self,
|
&self,
|
||||||
storage: &candle::CudaStorage,
|
storage: &candle::CudaStorage,
|
||||||
@ -507,7 +507,7 @@ impl candle::CustomOp2 for RmsNorm {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "cuda")]
|
#[cfg(feature = "_cuda")]
|
||||||
fn cuda_fwd(
|
fn cuda_fwd(
|
||||||
&self,
|
&self,
|
||||||
s1: &candle::CudaStorage,
|
s1: &candle::CudaStorage,
|
||||||
@ -740,7 +740,7 @@ impl candle::CustomOp3 for LayerNorm {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "cuda")]
|
#[cfg(feature = "_cuda")]
|
||||||
fn cuda_fwd(
|
fn cuda_fwd(
|
||||||
&self,
|
&self,
|
||||||
s1: &candle::CudaStorage,
|
s1: &candle::CudaStorage,
|
||||||
|
@ -77,7 +77,7 @@ impl candle::CustomOp3 for RotaryEmbI {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "cuda")]
|
#[cfg(feature = "_cuda")]
|
||||||
fn cuda_fwd(
|
fn cuda_fwd(
|
||||||
&self,
|
&self,
|
||||||
s1: &candle::CudaStorage,
|
s1: &candle::CudaStorage,
|
||||||
@ -322,7 +322,7 @@ impl candle::CustomOp3 for RotaryEmb {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "cuda")]
|
#[cfg(feature = "_cuda")]
|
||||||
fn cuda_fwd(
|
fn cuda_fwd(
|
||||||
&self,
|
&self,
|
||||||
s1: &candle::CudaStorage,
|
s1: &candle::CudaStorage,
|
||||||
@ -576,7 +576,7 @@ impl candle::CustomOp3 for RotaryEmbThd {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "cuda")]
|
#[cfg(feature = "_cuda")]
|
||||||
fn cuda_fwd(
|
fn cuda_fwd(
|
||||||
&self,
|
&self,
|
||||||
s1: &candle::CudaStorage,
|
s1: &candle::CudaStorage,
|
||||||
|
@ -28,6 +28,7 @@ tracing = { workspace = true }
|
|||||||
[features]
|
[features]
|
||||||
default = []
|
default = []
|
||||||
accelerate = ["dep:accelerate-src", "candle/accelerate", "candle-nn/accelerate"]
|
accelerate = ["dep:accelerate-src", "candle/accelerate", "candle-nn/accelerate"]
|
||||||
|
_cuda = ["candle/_cuda", "candle-nn/_cuda"]
|
||||||
cuda = ["candle/cuda", "candle-nn/cuda"]
|
cuda = ["candle/cuda", "candle-nn/cuda"]
|
||||||
flash-attn = ["cuda", "dep:candle-flash-attn"]
|
flash-attn = ["cuda", "dep:candle-flash-attn"]
|
||||||
mkl = ["dep:intel-mkl-src", "candle/mkl", "candle-nn/mkl"]
|
mkl = ["dep:intel-mkl-src", "candle/mkl", "candle-nn/mkl"]
|
||||||
|
Reference in New Issue
Block a user