From ec6d7ca7738f4052b6613edc8f4d2bb6866a7539 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Sat, 29 Mar 2025 09:27:53 +0100 Subject: [PATCH] Cudarc static-linking enabled. --- Cargo.toml | 2 +- candle-core/Cargo.toml | 7 ++++--- candle-core/benches/benchmarks/mod.rs | 6 +++--- candle-core/src/custom_op.rs | 8 ++++---- candle-core/src/lib.rs | 6 +++--- candle-core/src/quantized/mod.rs | 4 ++-- candle-core/src/sort.rs | 4 ++-- candle-core/src/test_utils.rs | 2 +- candle-core/src/utils.rs | 2 +- candle-core/tests/custom_op_tests.rs | 2 +- candle-flash-attn/Cargo.toml | 4 ++-- candle-nn/Cargo.toml | 1 + candle-nn/benches/benchmarks/mod.rs | 6 +++--- candle-nn/src/ops.rs | 8 ++++---- candle-nn/src/rotary_emb.rs | 6 +++--- candle-transformers/Cargo.toml | 1 + 16 files changed, 36 insertions(+), 33 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 44bc5412..ce075ace 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,7 +43,7 @@ candle-onnx = { path = "./candle-onnx", version = "0.8.4" } candle-transformers = { path = "./candle-transformers", version = "0.8.4" } clap = { version = "4.2.4", features = ["derive"] } criterion = { version = "0.5.1", default-features=false } -cudarc = { version = "0.13.5", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16", "cuda-version-from-build-system", "dynamic-linking"], default-features=false } +cudarc = { version = "0.13.5", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16"], default-features=false } fancy-regex = "0.13.0" gemm = { version = "0.17.0", features = ["wasm-simd128-enable"] } hf-hub = "0.4.1" diff --git a/candle-core/Cargo.toml b/candle-core/Cargo.toml index 66e456ed..27198a46 100644 --- a/candle-core/Cargo.toml +++ b/candle-core/Cargo.toml @@ -15,7 +15,7 @@ byteorder = { workspace = true } candle-kernels = { workspace = true, optional = true } candle-metal-kernels = { workspace = true, optional = true } metal = { workspace = true, optional = true } -cudarc = { workspace = true, optional = true } +cudarc = { workspace = true, optional = true} gemm = { workspace = true } half = { workspace = true } intel-mkl-src = { workspace = true, optional = true } @@ -43,8 +43,9 @@ criterion = { workspace = true } [features] default = [] -cuda = ["cudarc", "dep:candle-kernels", "dep:ug-cuda"] -cudnn = ["cuda", "cudarc/cudnn"] +_cuda = ["dep:cudarc", "dep:candle-kernels", "dep:ug-cuda"] +# cuda = ["_cuda", "cudarc?/cuda-version-from-build-system", "cudarc?/dynamic-linking"] +cudnn = ["_cuda", "cudarc?/cudnn"] _mkl = ["dep:libc", "dep:intel-mkl-src"] mkl = ["_mkl", "intel-mkl-src?/mkl-static-lp64-iomp"] accelerate = ["dep:libc", "dep:accelerate-src"] diff --git a/candle-core/benches/benchmarks/mod.rs b/candle-core/benches/benchmarks/mod.rs index 66597ae1..cc3a8557 100644 --- a/candle-core/benches/benchmarks/mod.rs +++ b/candle-core/benches/benchmarks/mod.rs @@ -20,9 +20,9 @@ impl BenchDevice for Device { match self { Device::Cpu => Ok(()), Device::Cuda(device) => { - #[cfg(feature = "cuda")] + #[cfg(feature = "_cuda")] return Ok(device.synchronize()?); - #[cfg(not(feature = "cuda"))] + #[cfg(not(feature = "_cuda"))] panic!("Cuda device without cuda feature enabled: {:?}", device) } Device::Metal(device) => { @@ -61,7 +61,7 @@ impl BenchDeviceHandler { let mut devices = Vec::new(); if cfg!(feature = "metal") { devices.push(Device::new_metal(0)?); - } else if cfg!(feature = "cuda") { + } else if cfg!(feature = "_cuda") { devices.push(Device::new_cuda(0)?); } devices.push(Device::Cpu); diff --git a/candle-core/src/custom_op.rs b/candle-core/src/custom_op.rs index 18d4786e..5e7ddfff 100644 --- a/candle-core/src/custom_op.rs +++ b/candle-core/src/custom_op.rs @@ -378,7 +378,7 @@ impl Tensor { pub struct UgIOp1 { name: &'static str, - #[cfg(feature = "cuda")] + #[cfg(feature = "_cuda")] func: cudarc::driver::CudaFunction, #[cfg(feature = "metal")] func: metal::ComputePipelineState, @@ -392,7 +392,7 @@ impl UgIOp1 { kernel: ug::lang::ssa::Kernel, device: &crate::Device, ) -> Result { - #[cfg(feature = "cuda")] + #[cfg(feature = "_cuda")] { let device = device.as_cuda_device()?; let func = device.compile(name, kernel)?; @@ -404,7 +404,7 @@ impl UgIOp1 { let func = device.compile(name, kernel)?; Ok(Self { name, func }) } - #[cfg(not(any(feature = "cuda", feature = "metal")))] + #[cfg(not(any(feature = "_cuda", feature = "metal")))] { Ok(Self { name }) } @@ -456,7 +456,7 @@ impl InplaceOp1 for UgIOp1 { Ok(()) } - #[cfg(feature = "cuda")] + #[cfg(feature = "_cuda")] fn cuda_fwd(&self, sto: &mut CudaStorage, layout: &Layout) -> Result<()> { use crate::cuda_backend::WrapErr; use cudarc::driver::LaunchAsync; diff --git a/candle-core/src/lib.rs b/candle-core/src/lib.rs index acdd3461..ccf58f84 100644 --- a/candle-core/src/lib.rs +++ b/candle-core/src/lib.rs @@ -55,7 +55,7 @@ pub mod conv; mod convert; pub mod cpu; pub mod cpu_backend; -#[cfg(feature = "cuda")] +#[cfg(feature = "_cuda")] pub mod cuda_backend; mod custom_op; mod device; @@ -104,10 +104,10 @@ pub use strided_index::{StridedBlocks, StridedIndex}; pub use tensor::{Tensor, TensorId}; pub use variable::Var; -#[cfg(feature = "cuda")] +#[cfg(feature = "_cuda")] pub use cuda_backend as cuda; -#[cfg(not(feature = "cuda"))] +#[cfg(not(feature = "_cuda"))] pub use dummy_cuda_backend as cuda; pub use cuda::{CudaDevice, CudaStorage}; diff --git a/candle-core/src/quantized/mod.rs b/candle-core/src/quantized/mod.rs index 802c5691..cbfdbd7c 100644 --- a/candle-core/src/quantized/mod.rs +++ b/candle-core/src/quantized/mod.rs @@ -16,9 +16,9 @@ pub mod metal; mod metal { pub use super::dummy_metal::*; } -#[cfg(feature = "cuda")] +#[cfg(feature = "_cuda")] pub mod cuda; -#[cfg(not(feature = "cuda"))] +#[cfg(not(feature = "_cuda"))] mod cuda { pub use super::dummy_cuda::*; } diff --git a/candle-core/src/sort.rs b/candle-core/src/sort.rs index 0ebb1835..c993f055 100644 --- a/candle-core/src/sort.rs +++ b/candle-core/src/sort.rs @@ -52,7 +52,7 @@ impl ArgSort { } } -#[cfg(feature = "cuda")] +#[cfg(feature = "_cuda")] mod cuda { use super::*; use crate::cuda_backend::cudarc::driver::{ @@ -118,7 +118,7 @@ impl crate::CustomOp1 for ArgSort { Ok((sort_indexes, layout.shape().into())) } - #[cfg(feature = "cuda")] + #[cfg(feature = "_cuda")] fn cuda_fwd( &self, storage: &crate::CudaStorage, diff --git a/candle-core/src/test_utils.rs b/candle-core/src/test_utils.rs index 3b8fb904..18822602 100644 --- a/candle-core/src/test_utils.rs +++ b/candle-core/src/test_utils.rs @@ -10,7 +10,7 @@ macro_rules! test_device { $fn_name(&Device::Cpu) } - #[cfg(feature = "cuda")] + #[cfg(feature = "_cuda")] #[test] fn $test_cuda() -> Result<()> { $fn_name(&Device::new_cuda(0)?) diff --git a/candle-core/src/utils.rs b/candle-core/src/utils.rs index 9e0a9026..12da6f51 100644 --- a/candle-core/src/utils.rs +++ b/candle-core/src/utils.rs @@ -21,7 +21,7 @@ pub fn has_mkl() -> bool { } pub fn cuda_is_available() -> bool { - cfg!(feature = "cuda") + cfg!(feature = "_cuda") } pub fn metal_is_available() -> bool { diff --git a/candle-core/tests/custom_op_tests.rs b/candle-core/tests/custom_op_tests.rs index 3fc45971..35174649 100644 --- a/candle-core/tests/custom_op_tests.rs +++ b/candle-core/tests/custom_op_tests.rs @@ -144,7 +144,7 @@ fn inplace_op1() -> Result<()> { Ok(()) } -#[cfg(any(feature = "cuda", feature = "metal"))] +#[cfg(any(feature = "_cuda", feature = "metal"))] #[allow(clippy::approx_constant)] #[test] fn ug_op() -> Result<()> { diff --git a/candle-flash-attn/Cargo.toml b/candle-flash-attn/Cargo.toml index f9c65fe9..10e6f652 100644 --- a/candle-flash-attn/Cargo.toml +++ b/candle-flash-attn/Cargo.toml @@ -11,7 +11,7 @@ license = "MIT OR Apache-2.0" readme = "README.md" [dependencies] -candle = { path = "../candle-core", features = ["cuda"], package = "candle-core", version = "0.8.4" } +candle = { path = "../candle-core", features = ["_cuda"], package = "candle-core", version = "0.8.4" } half = { version = "2.3.1", features = ["num-traits"] } [build-dependencies] @@ -21,4 +21,4 @@ anyhow = { version = "1", features = ["backtrace"] } [dev-dependencies] anyhow = { version = "1", features = ["backtrace"] } -candle-nn = { path = "../candle-nn", features = ["cuda"] } +candle-nn = { path = "../candle-nn", features = ["_cuda"] } diff --git a/candle-nn/Cargo.toml b/candle-nn/Cargo.toml index dc19529a..cb6be0cd 100644 --- a/candle-nn/Cargo.toml +++ b/candle-nn/Cargo.toml @@ -32,6 +32,7 @@ criterion = { workspace = true } [features] default = [] accelerate = ["dep:accelerate-src", "candle/accelerate"] +_cuda = ["candle/_cuda"] cuda = ["candle/cuda"] _mkl = ["dep:intel-mkl-src", "candle/_mkl"] mkl = ["candle/mkl"] diff --git a/candle-nn/benches/benchmarks/mod.rs b/candle-nn/benches/benchmarks/mod.rs index 3620cc04..15b220e4 100644 --- a/candle-nn/benches/benchmarks/mod.rs +++ b/candle-nn/benches/benchmarks/mod.rs @@ -15,9 +15,9 @@ impl BenchDevice for Device { match self { Device::Cpu => Ok(()), Device::Cuda(device) => { - #[cfg(feature = "cuda")] + #[cfg(feature = "_cuda")] return Ok(device.synchronize()?); - #[cfg(not(feature = "cuda"))] + #[cfg(not(feature = "_cuda"))] panic!("Cuda device without cuda feature enabled: {:?}", device) } Device::Metal(device) => { @@ -56,7 +56,7 @@ impl BenchDeviceHandler { let mut devices = Vec::new(); if cfg!(feature = "metal") { devices.push(Device::new_metal(0)?); - } else if cfg!(feature = "cuda") { + } else if cfg!(feature = "_cuda") { devices.push(Device::new_cuda(0)?); } devices.push(Device::Cpu); diff --git a/candle-nn/src/ops.rs b/candle-nn/src/ops.rs index d7f88a0b..182d6802 100644 --- a/candle-nn/src/ops.rs +++ b/candle-nn/src/ops.rs @@ -82,7 +82,7 @@ impl candle::CustomOp1 for Sigmoid { Ok((storage, layout.shape().clone())) } - #[cfg(feature = "cuda")] + #[cfg(feature = "_cuda")] fn cuda_fwd( &self, storage: &candle::CudaStorage, @@ -333,7 +333,7 @@ impl candle::CustomOp1 for SoftmaxLastDim { } } - #[cfg(feature = "cuda")] + #[cfg(feature = "_cuda")] fn cuda_fwd( &self, storage: &candle::CudaStorage, @@ -507,7 +507,7 @@ impl candle::CustomOp2 for RmsNorm { } } - #[cfg(feature = "cuda")] + #[cfg(feature = "_cuda")] fn cuda_fwd( &self, s1: &candle::CudaStorage, @@ -740,7 +740,7 @@ impl candle::CustomOp3 for LayerNorm { } } - #[cfg(feature = "cuda")] + #[cfg(feature = "_cuda")] fn cuda_fwd( &self, s1: &candle::CudaStorage, diff --git a/candle-nn/src/rotary_emb.rs b/candle-nn/src/rotary_emb.rs index 0191bd7e..8e89d978 100644 --- a/candle-nn/src/rotary_emb.rs +++ b/candle-nn/src/rotary_emb.rs @@ -77,7 +77,7 @@ impl candle::CustomOp3 for RotaryEmbI { } } - #[cfg(feature = "cuda")] + #[cfg(feature = "_cuda")] fn cuda_fwd( &self, s1: &candle::CudaStorage, @@ -322,7 +322,7 @@ impl candle::CustomOp3 for RotaryEmb { } } - #[cfg(feature = "cuda")] + #[cfg(feature = "_cuda")] fn cuda_fwd( &self, s1: &candle::CudaStorage, @@ -576,7 +576,7 @@ impl candle::CustomOp3 for RotaryEmbThd { } } - #[cfg(feature = "cuda")] + #[cfg(feature = "_cuda")] fn cuda_fwd( &self, s1: &candle::CudaStorage, diff --git a/candle-transformers/Cargo.toml b/candle-transformers/Cargo.toml index 6589b4b1..40a8868e 100644 --- a/candle-transformers/Cargo.toml +++ b/candle-transformers/Cargo.toml @@ -28,6 +28,7 @@ tracing = { workspace = true } [features] default = [] accelerate = ["dep:accelerate-src", "candle/accelerate", "candle-nn/accelerate"] +_cuda = ["candle/_cuda", "candle-nn/_cuda"] cuda = ["candle/cuda", "candle-nn/cuda"] flash-attn = ["cuda", "dep:candle-flash-attn"] mkl = ["dep:intel-mkl-src", "candle/mkl", "candle-nn/mkl"]