mirror of
https://github.com/huggingface/candle.git
synced 2025-06-15 10:26:33 +00:00
Cudarc static-linking enabled.
This commit is contained in:
@ -43,7 +43,7 @@ candle-onnx = { path = "./candle-onnx", version = "0.8.4" }
|
||||
candle-transformers = { path = "./candle-transformers", version = "0.8.4" }
|
||||
clap = { version = "4.2.4", features = ["derive"] }
|
||||
criterion = { version = "0.5.1", default-features=false }
|
||||
cudarc = { version = "0.13.5", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16", "cuda-version-from-build-system", "dynamic-linking"], default-features=false }
|
||||
cudarc = { version = "0.13.5", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16"], default-features=false }
|
||||
fancy-regex = "0.13.0"
|
||||
gemm = { version = "0.17.0", features = ["wasm-simd128-enable"] }
|
||||
hf-hub = "0.4.1"
|
||||
|
@ -43,8 +43,9 @@ criterion = { workspace = true }
|
||||
|
||||
[features]
|
||||
default = []
|
||||
cuda = ["cudarc", "dep:candle-kernels", "dep:ug-cuda"]
|
||||
cudnn = ["cuda", "cudarc/cudnn"]
|
||||
_cuda = ["dep:cudarc", "dep:candle-kernels", "dep:ug-cuda"]
|
||||
# cuda = ["_cuda", "cudarc?/cuda-version-from-build-system", "cudarc?/dynamic-linking"]
|
||||
cudnn = ["_cuda", "cudarc?/cudnn"]
|
||||
_mkl = ["dep:libc", "dep:intel-mkl-src"]
|
||||
mkl = ["_mkl", "intel-mkl-src?/mkl-static-lp64-iomp"]
|
||||
accelerate = ["dep:libc", "dep:accelerate-src"]
|
||||
|
@ -20,9 +20,9 @@ impl BenchDevice for Device {
|
||||
match self {
|
||||
Device::Cpu => Ok(()),
|
||||
Device::Cuda(device) => {
|
||||
#[cfg(feature = "cuda")]
|
||||
#[cfg(feature = "_cuda")]
|
||||
return Ok(device.synchronize()?);
|
||||
#[cfg(not(feature = "cuda"))]
|
||||
#[cfg(not(feature = "_cuda"))]
|
||||
panic!("Cuda device without cuda feature enabled: {:?}", device)
|
||||
}
|
||||
Device::Metal(device) => {
|
||||
@ -61,7 +61,7 @@ impl BenchDeviceHandler {
|
||||
let mut devices = Vec::new();
|
||||
if cfg!(feature = "metal") {
|
||||
devices.push(Device::new_metal(0)?);
|
||||
} else if cfg!(feature = "cuda") {
|
||||
} else if cfg!(feature = "_cuda") {
|
||||
devices.push(Device::new_cuda(0)?);
|
||||
}
|
||||
devices.push(Device::Cpu);
|
||||
|
@ -378,7 +378,7 @@ impl Tensor {
|
||||
|
||||
pub struct UgIOp1 {
|
||||
name: &'static str,
|
||||
#[cfg(feature = "cuda")]
|
||||
#[cfg(feature = "_cuda")]
|
||||
func: cudarc::driver::CudaFunction,
|
||||
#[cfg(feature = "metal")]
|
||||
func: metal::ComputePipelineState,
|
||||
@ -392,7 +392,7 @@ impl UgIOp1 {
|
||||
kernel: ug::lang::ssa::Kernel,
|
||||
device: &crate::Device,
|
||||
) -> Result<Self> {
|
||||
#[cfg(feature = "cuda")]
|
||||
#[cfg(feature = "_cuda")]
|
||||
{
|
||||
let device = device.as_cuda_device()?;
|
||||
let func = device.compile(name, kernel)?;
|
||||
@ -404,7 +404,7 @@ impl UgIOp1 {
|
||||
let func = device.compile(name, kernel)?;
|
||||
Ok(Self { name, func })
|
||||
}
|
||||
#[cfg(not(any(feature = "cuda", feature = "metal")))]
|
||||
#[cfg(not(any(feature = "_cuda", feature = "metal")))]
|
||||
{
|
||||
Ok(Self { name })
|
||||
}
|
||||
@ -456,7 +456,7 @@ impl InplaceOp1 for UgIOp1 {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
#[cfg(feature = "_cuda")]
|
||||
fn cuda_fwd(&self, sto: &mut CudaStorage, layout: &Layout) -> Result<()> {
|
||||
use crate::cuda_backend::WrapErr;
|
||||
use cudarc::driver::LaunchAsync;
|
||||
|
@ -55,7 +55,7 @@ pub mod conv;
|
||||
mod convert;
|
||||
pub mod cpu;
|
||||
pub mod cpu_backend;
|
||||
#[cfg(feature = "cuda")]
|
||||
#[cfg(feature = "_cuda")]
|
||||
pub mod cuda_backend;
|
||||
mod custom_op;
|
||||
mod device;
|
||||
@ -104,10 +104,10 @@ pub use strided_index::{StridedBlocks, StridedIndex};
|
||||
pub use tensor::{Tensor, TensorId};
|
||||
pub use variable::Var;
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
#[cfg(feature = "_cuda")]
|
||||
pub use cuda_backend as cuda;
|
||||
|
||||
#[cfg(not(feature = "cuda"))]
|
||||
#[cfg(not(feature = "_cuda"))]
|
||||
pub use dummy_cuda_backend as cuda;
|
||||
|
||||
pub use cuda::{CudaDevice, CudaStorage};
|
||||
|
@ -16,9 +16,9 @@ pub mod metal;
|
||||
mod metal {
|
||||
pub use super::dummy_metal::*;
|
||||
}
|
||||
#[cfg(feature = "cuda")]
|
||||
#[cfg(feature = "_cuda")]
|
||||
pub mod cuda;
|
||||
#[cfg(not(feature = "cuda"))]
|
||||
#[cfg(not(feature = "_cuda"))]
|
||||
mod cuda {
|
||||
pub use super::dummy_cuda::*;
|
||||
}
|
||||
|
@ -52,7 +52,7 @@ impl ArgSort {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
#[cfg(feature = "_cuda")]
|
||||
mod cuda {
|
||||
use super::*;
|
||||
use crate::cuda_backend::cudarc::driver::{
|
||||
@ -118,7 +118,7 @@ impl crate::CustomOp1 for ArgSort {
|
||||
Ok((sort_indexes, layout.shape().into()))
|
||||
}
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
#[cfg(feature = "_cuda")]
|
||||
fn cuda_fwd(
|
||||
&self,
|
||||
storage: &crate::CudaStorage,
|
||||
|
@ -10,7 +10,7 @@ macro_rules! test_device {
|
||||
$fn_name(&Device::Cpu)
|
||||
}
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
#[cfg(feature = "_cuda")]
|
||||
#[test]
|
||||
fn $test_cuda() -> Result<()> {
|
||||
$fn_name(&Device::new_cuda(0)?)
|
||||
|
@ -21,7 +21,7 @@ pub fn has_mkl() -> bool {
|
||||
}
|
||||
|
||||
pub fn cuda_is_available() -> bool {
|
||||
cfg!(feature = "cuda")
|
||||
cfg!(feature = "_cuda")
|
||||
}
|
||||
|
||||
pub fn metal_is_available() -> bool {
|
||||
|
@ -144,7 +144,7 @@ fn inplace_op1() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(any(feature = "cuda", feature = "metal"))]
|
||||
#[cfg(any(feature = "_cuda", feature = "metal"))]
|
||||
#[allow(clippy::approx_constant)]
|
||||
#[test]
|
||||
fn ug_op() -> Result<()> {
|
||||
|
@ -11,7 +11,7 @@ license = "MIT OR Apache-2.0"
|
||||
readme = "README.md"
|
||||
|
||||
[dependencies]
|
||||
candle = { path = "../candle-core", features = ["cuda"], package = "candle-core", version = "0.8.4" }
|
||||
candle = { path = "../candle-core", features = ["_cuda"], package = "candle-core", version = "0.8.4" }
|
||||
half = { version = "2.3.1", features = ["num-traits"] }
|
||||
|
||||
[build-dependencies]
|
||||
@ -21,4 +21,4 @@ anyhow = { version = "1", features = ["backtrace"] }
|
||||
|
||||
[dev-dependencies]
|
||||
anyhow = { version = "1", features = ["backtrace"] }
|
||||
candle-nn = { path = "../candle-nn", features = ["cuda"] }
|
||||
candle-nn = { path = "../candle-nn", features = ["_cuda"] }
|
||||
|
@ -32,6 +32,7 @@ criterion = { workspace = true }
|
||||
[features]
|
||||
default = []
|
||||
accelerate = ["dep:accelerate-src", "candle/accelerate"]
|
||||
_cuda = ["candle/_cuda"]
|
||||
cuda = ["candle/cuda"]
|
||||
_mkl = ["dep:intel-mkl-src", "candle/_mkl"]
|
||||
mkl = ["candle/mkl"]
|
||||
|
@ -15,9 +15,9 @@ impl BenchDevice for Device {
|
||||
match self {
|
||||
Device::Cpu => Ok(()),
|
||||
Device::Cuda(device) => {
|
||||
#[cfg(feature = "cuda")]
|
||||
#[cfg(feature = "_cuda")]
|
||||
return Ok(device.synchronize()?);
|
||||
#[cfg(not(feature = "cuda"))]
|
||||
#[cfg(not(feature = "_cuda"))]
|
||||
panic!("Cuda device without cuda feature enabled: {:?}", device)
|
||||
}
|
||||
Device::Metal(device) => {
|
||||
@ -56,7 +56,7 @@ impl BenchDeviceHandler {
|
||||
let mut devices = Vec::new();
|
||||
if cfg!(feature = "metal") {
|
||||
devices.push(Device::new_metal(0)?);
|
||||
} else if cfg!(feature = "cuda") {
|
||||
} else if cfg!(feature = "_cuda") {
|
||||
devices.push(Device::new_cuda(0)?);
|
||||
}
|
||||
devices.push(Device::Cpu);
|
||||
|
@ -82,7 +82,7 @@ impl candle::CustomOp1 for Sigmoid {
|
||||
Ok((storage, layout.shape().clone()))
|
||||
}
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
#[cfg(feature = "_cuda")]
|
||||
fn cuda_fwd(
|
||||
&self,
|
||||
storage: &candle::CudaStorage,
|
||||
@ -333,7 +333,7 @@ impl candle::CustomOp1 for SoftmaxLastDim {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
#[cfg(feature = "_cuda")]
|
||||
fn cuda_fwd(
|
||||
&self,
|
||||
storage: &candle::CudaStorage,
|
||||
@ -507,7 +507,7 @@ impl candle::CustomOp2 for RmsNorm {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
#[cfg(feature = "_cuda")]
|
||||
fn cuda_fwd(
|
||||
&self,
|
||||
s1: &candle::CudaStorage,
|
||||
@ -740,7 +740,7 @@ impl candle::CustomOp3 for LayerNorm {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
#[cfg(feature = "_cuda")]
|
||||
fn cuda_fwd(
|
||||
&self,
|
||||
s1: &candle::CudaStorage,
|
||||
|
@ -77,7 +77,7 @@ impl candle::CustomOp3 for RotaryEmbI {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
#[cfg(feature = "_cuda")]
|
||||
fn cuda_fwd(
|
||||
&self,
|
||||
s1: &candle::CudaStorage,
|
||||
@ -322,7 +322,7 @@ impl candle::CustomOp3 for RotaryEmb {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
#[cfg(feature = "_cuda")]
|
||||
fn cuda_fwd(
|
||||
&self,
|
||||
s1: &candle::CudaStorage,
|
||||
@ -576,7 +576,7 @@ impl candle::CustomOp3 for RotaryEmbThd {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
#[cfg(feature = "_cuda")]
|
||||
fn cuda_fwd(
|
||||
&self,
|
||||
s1: &candle::CudaStorage,
|
||||
|
@ -28,6 +28,7 @@ tracing = { workspace = true }
|
||||
[features]
|
||||
default = []
|
||||
accelerate = ["dep:accelerate-src", "candle/accelerate", "candle-nn/accelerate"]
|
||||
_cuda = ["candle/_cuda", "candle-nn/_cuda"]
|
||||
cuda = ["candle/cuda", "candle-nn/cuda"]
|
||||
flash-attn = ["cuda", "dep:candle-flash-attn"]
|
||||
mkl = ["dep:intel-mkl-src", "candle/mkl", "candle-nn/mkl"]
|
||||
|
Reference in New Issue
Block a user