Cudarc static-linking enabled.

This commit is contained in:
Nicolas Patry
2025-03-29 09:27:53 +01:00
parent 2c0f6b008e
commit ec6d7ca773
16 changed files with 36 additions and 33 deletions

View File

@ -43,7 +43,7 @@ candle-onnx = { path = "./candle-onnx", version = "0.8.4" }
candle-transformers = { path = "./candle-transformers", version = "0.8.4" }
clap = { version = "4.2.4", features = ["derive"] }
criterion = { version = "0.5.1", default-features=false }
cudarc = { version = "0.13.5", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16", "cuda-version-from-build-system", "dynamic-linking"], default-features=false }
cudarc = { version = "0.13.5", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16"], default-features=false }
fancy-regex = "0.13.0"
gemm = { version = "0.17.0", features = ["wasm-simd128-enable"] }
hf-hub = "0.4.1"

View File

@ -43,8 +43,9 @@ criterion = { workspace = true }
[features]
default = []
cuda = ["cudarc", "dep:candle-kernels", "dep:ug-cuda"]
cudnn = ["cuda", "cudarc/cudnn"]
_cuda = ["dep:cudarc", "dep:candle-kernels", "dep:ug-cuda"]
# cuda = ["_cuda", "cudarc?/cuda-version-from-build-system", "cudarc?/dynamic-linking"]
cudnn = ["_cuda", "cudarc?/cudnn"]
_mkl = ["dep:libc", "dep:intel-mkl-src"]
mkl = ["_mkl", "intel-mkl-src?/mkl-static-lp64-iomp"]
accelerate = ["dep:libc", "dep:accelerate-src"]

View File

@ -20,9 +20,9 @@ impl BenchDevice for Device {
match self {
Device::Cpu => Ok(()),
Device::Cuda(device) => {
#[cfg(feature = "cuda")]
#[cfg(feature = "_cuda")]
return Ok(device.synchronize()?);
#[cfg(not(feature = "cuda"))]
#[cfg(not(feature = "_cuda"))]
panic!("Cuda device without cuda feature enabled: {:?}", device)
}
Device::Metal(device) => {
@ -61,7 +61,7 @@ impl BenchDeviceHandler {
let mut devices = Vec::new();
if cfg!(feature = "metal") {
devices.push(Device::new_metal(0)?);
} else if cfg!(feature = "cuda") {
} else if cfg!(feature = "_cuda") {
devices.push(Device::new_cuda(0)?);
}
devices.push(Device::Cpu);

View File

@ -378,7 +378,7 @@ impl Tensor {
pub struct UgIOp1 {
name: &'static str,
#[cfg(feature = "cuda")]
#[cfg(feature = "_cuda")]
func: cudarc::driver::CudaFunction,
#[cfg(feature = "metal")]
func: metal::ComputePipelineState,
@ -392,7 +392,7 @@ impl UgIOp1 {
kernel: ug::lang::ssa::Kernel,
device: &crate::Device,
) -> Result<Self> {
#[cfg(feature = "cuda")]
#[cfg(feature = "_cuda")]
{
let device = device.as_cuda_device()?;
let func = device.compile(name, kernel)?;
@ -404,7 +404,7 @@ impl UgIOp1 {
let func = device.compile(name, kernel)?;
Ok(Self { name, func })
}
#[cfg(not(any(feature = "cuda", feature = "metal")))]
#[cfg(not(any(feature = "_cuda", feature = "metal")))]
{
Ok(Self { name })
}
@ -456,7 +456,7 @@ impl InplaceOp1 for UgIOp1 {
Ok(())
}
#[cfg(feature = "cuda")]
#[cfg(feature = "_cuda")]
fn cuda_fwd(&self, sto: &mut CudaStorage, layout: &Layout) -> Result<()> {
use crate::cuda_backend::WrapErr;
use cudarc::driver::LaunchAsync;

View File

@ -55,7 +55,7 @@ pub mod conv;
mod convert;
pub mod cpu;
pub mod cpu_backend;
#[cfg(feature = "cuda")]
#[cfg(feature = "_cuda")]
pub mod cuda_backend;
mod custom_op;
mod device;
@ -104,10 +104,10 @@ pub use strided_index::{StridedBlocks, StridedIndex};
pub use tensor::{Tensor, TensorId};
pub use variable::Var;
#[cfg(feature = "cuda")]
#[cfg(feature = "_cuda")]
pub use cuda_backend as cuda;
#[cfg(not(feature = "cuda"))]
#[cfg(not(feature = "_cuda"))]
pub use dummy_cuda_backend as cuda;
pub use cuda::{CudaDevice, CudaStorage};

View File

@ -16,9 +16,9 @@ pub mod metal;
mod metal {
pub use super::dummy_metal::*;
}
#[cfg(feature = "cuda")]
#[cfg(feature = "_cuda")]
pub mod cuda;
#[cfg(not(feature = "cuda"))]
#[cfg(not(feature = "_cuda"))]
mod cuda {
pub use super::dummy_cuda::*;
}

View File

@ -52,7 +52,7 @@ impl ArgSort {
}
}
#[cfg(feature = "cuda")]
#[cfg(feature = "_cuda")]
mod cuda {
use super::*;
use crate::cuda_backend::cudarc::driver::{
@ -118,7 +118,7 @@ impl crate::CustomOp1 for ArgSort {
Ok((sort_indexes, layout.shape().into()))
}
#[cfg(feature = "cuda")]
#[cfg(feature = "_cuda")]
fn cuda_fwd(
&self,
storage: &crate::CudaStorage,

View File

@ -10,7 +10,7 @@ macro_rules! test_device {
$fn_name(&Device::Cpu)
}
#[cfg(feature = "cuda")]
#[cfg(feature = "_cuda")]
#[test]
fn $test_cuda() -> Result<()> {
$fn_name(&Device::new_cuda(0)?)

View File

@ -21,7 +21,7 @@ pub fn has_mkl() -> bool {
}
pub fn cuda_is_available() -> bool {
cfg!(feature = "cuda")
cfg!(feature = "_cuda")
}
pub fn metal_is_available() -> bool {

View File

@ -144,7 +144,7 @@ fn inplace_op1() -> Result<()> {
Ok(())
}
#[cfg(any(feature = "cuda", feature = "metal"))]
#[cfg(any(feature = "_cuda", feature = "metal"))]
#[allow(clippy::approx_constant)]
#[test]
fn ug_op() -> Result<()> {

View File

@ -11,7 +11,7 @@ license = "MIT OR Apache-2.0"
readme = "README.md"
[dependencies]
candle = { path = "../candle-core", features = ["cuda"], package = "candle-core", version = "0.8.4" }
candle = { path = "../candle-core", features = ["_cuda"], package = "candle-core", version = "0.8.4" }
half = { version = "2.3.1", features = ["num-traits"] }
[build-dependencies]
@ -21,4 +21,4 @@ anyhow = { version = "1", features = ["backtrace"] }
[dev-dependencies]
anyhow = { version = "1", features = ["backtrace"] }
candle-nn = { path = "../candle-nn", features = ["cuda"] }
candle-nn = { path = "../candle-nn", features = ["_cuda"] }

View File

@ -32,6 +32,7 @@ criterion = { workspace = true }
[features]
default = []
accelerate = ["dep:accelerate-src", "candle/accelerate"]
_cuda = ["candle/_cuda"]
cuda = ["candle/cuda"]
_mkl = ["dep:intel-mkl-src", "candle/_mkl"]
mkl = ["candle/mkl"]

View File

@ -15,9 +15,9 @@ impl BenchDevice for Device {
match self {
Device::Cpu => Ok(()),
Device::Cuda(device) => {
#[cfg(feature = "cuda")]
#[cfg(feature = "_cuda")]
return Ok(device.synchronize()?);
#[cfg(not(feature = "cuda"))]
#[cfg(not(feature = "_cuda"))]
panic!("Cuda device without cuda feature enabled: {:?}", device)
}
Device::Metal(device) => {
@ -56,7 +56,7 @@ impl BenchDeviceHandler {
let mut devices = Vec::new();
if cfg!(feature = "metal") {
devices.push(Device::new_metal(0)?);
} else if cfg!(feature = "cuda") {
} else if cfg!(feature = "_cuda") {
devices.push(Device::new_cuda(0)?);
}
devices.push(Device::Cpu);

View File

@ -82,7 +82,7 @@ impl candle::CustomOp1 for Sigmoid {
Ok((storage, layout.shape().clone()))
}
#[cfg(feature = "cuda")]
#[cfg(feature = "_cuda")]
fn cuda_fwd(
&self,
storage: &candle::CudaStorage,
@ -333,7 +333,7 @@ impl candle::CustomOp1 for SoftmaxLastDim {
}
}
#[cfg(feature = "cuda")]
#[cfg(feature = "_cuda")]
fn cuda_fwd(
&self,
storage: &candle::CudaStorage,
@ -507,7 +507,7 @@ impl candle::CustomOp2 for RmsNorm {
}
}
#[cfg(feature = "cuda")]
#[cfg(feature = "_cuda")]
fn cuda_fwd(
&self,
s1: &candle::CudaStorage,
@ -740,7 +740,7 @@ impl candle::CustomOp3 for LayerNorm {
}
}
#[cfg(feature = "cuda")]
#[cfg(feature = "_cuda")]
fn cuda_fwd(
&self,
s1: &candle::CudaStorage,

View File

@ -77,7 +77,7 @@ impl candle::CustomOp3 for RotaryEmbI {
}
}
#[cfg(feature = "cuda")]
#[cfg(feature = "_cuda")]
fn cuda_fwd(
&self,
s1: &candle::CudaStorage,
@ -322,7 +322,7 @@ impl candle::CustomOp3 for RotaryEmb {
}
}
#[cfg(feature = "cuda")]
#[cfg(feature = "_cuda")]
fn cuda_fwd(
&self,
s1: &candle::CudaStorage,
@ -576,7 +576,7 @@ impl candle::CustomOp3 for RotaryEmbThd {
}
}
#[cfg(feature = "cuda")]
#[cfg(feature = "_cuda")]
fn cuda_fwd(
&self,
s1: &candle::CudaStorage,

View File

@ -28,6 +28,7 @@ tracing = { workspace = true }
[features]
default = []
accelerate = ["dep:accelerate-src", "candle/accelerate", "candle-nn/accelerate"]
_cuda = ["candle/_cuda", "candle-nn/_cuda"]
cuda = ["candle/cuda", "candle-nn/cuda"]
flash-attn = ["cuda", "dep:candle-flash-attn"]
mkl = ["dep:intel-mkl-src", "candle/mkl", "candle-nn/mkl"]