mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 02:38:10 +00:00

* Add the scatter_set op. * Metal op. * Cuda version. * Merge the checks. * Add the actual ops.
291 lines
7.7 KiB
Rust
291 lines
7.7 KiB
Rust
//! Implementation of the Cuda backend when Cuda support has not been compiled in.
|
|
//!
|
|
#![allow(dead_code)]
|
|
use crate::op::{BinaryOpT, CmpOp, ReduceOp, UnaryOpT};
|
|
use crate::{CpuStorage, DType, Error, Layout, Result, Shape};
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct CudaDevice;
|
|
|
|
#[derive(Debug)]
|
|
pub struct CudaStorage;
|
|
|
|
macro_rules! fail {
|
|
() => {
|
|
unimplemented!("cuda support has not been enabled, add `cuda` feature to enable.")
|
|
};
|
|
}
|
|
|
|
impl CudaDevice {
|
|
pub fn new_with_stream(_: usize) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
}
|
|
|
|
impl crate::backend::BackendStorage for CudaStorage {
|
|
type Device = CudaDevice;
|
|
|
|
fn try_clone(&self, _: &Layout) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn dtype(&self) -> DType {
|
|
fail!()
|
|
}
|
|
|
|
fn device(&self) -> &Self::Device {
|
|
fail!()
|
|
}
|
|
|
|
fn const_set(&mut self, _: crate::scalar::Scalar, _: &Layout) -> Result<()> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn to_cpu_storage(&self) -> Result<CpuStorage> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn affine(&self, _: &Layout, _: f64, _: f64) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn powf(&self, _: &Layout, _: f64) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn elu(&self, _: &Layout, _: f64) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn reduce_op(&self, _: ReduceOp, _: &Layout, _: &[usize]) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn cmp(&self, _: CmpOp, _: &Self, _: &Layout, _: &Layout) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn to_dtype(&self, _: &Layout, _: DType) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn unary_impl<B: UnaryOpT>(&self, _: &Layout) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn binary_impl<B: BinaryOpT>(&self, _: &Self, _: &Layout, _: &Layout) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn where_cond(&self, _: &Layout, _: &Self, _: &Layout, _: &Self, _: &Layout) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn conv1d(
|
|
&self,
|
|
_: &Layout,
|
|
_: &Self,
|
|
_: &Layout,
|
|
_: &crate::conv::ParamsConv1D,
|
|
) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn conv_transpose1d(
|
|
&self,
|
|
_: &Layout,
|
|
_: &Self,
|
|
_: &Layout,
|
|
_: &crate::conv::ParamsConvTranspose1D,
|
|
) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn conv2d(
|
|
&self,
|
|
_: &Layout,
|
|
_: &Self,
|
|
_: &Layout,
|
|
_: &crate::conv::ParamsConv2D,
|
|
) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn conv_transpose2d(
|
|
&self,
|
|
_l: &Layout,
|
|
_kernel: &Self,
|
|
_kernel_l: &Layout,
|
|
_params: &crate::conv::ParamsConvTranspose2D,
|
|
) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn index_select(&self, _: &Self, _: &Layout, _: &Layout, _: usize) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
fn gather(&self, _: &Layout, _: &Self, _: &Layout, _: usize) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn scatter_set(
|
|
&mut self,
|
|
_: &Layout,
|
|
_: &Self,
|
|
_: &Layout,
|
|
_: &Self,
|
|
_: &Layout,
|
|
_: usize,
|
|
) -> Result<()> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn scatter_add_set(
|
|
&mut self,
|
|
_: &Layout,
|
|
_: &Self,
|
|
_: &Layout,
|
|
_: &Self,
|
|
_: &Layout,
|
|
_: usize,
|
|
) -> Result<()> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn index_add(
|
|
&self,
|
|
_: &Layout,
|
|
_: &Self,
|
|
_: &Layout,
|
|
_: &Self,
|
|
_: &Layout,
|
|
_: usize,
|
|
) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn matmul(
|
|
&self,
|
|
_: &Self,
|
|
_: (usize, usize, usize, usize),
|
|
_: &Layout,
|
|
_: &Layout,
|
|
) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn copy_strided_src(&self, _: &mut Self, _: usize, _: &Layout) -> Result<()> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn copy2d(
|
|
&self,
|
|
_: &mut Self,
|
|
_: usize,
|
|
_: usize,
|
|
_: usize,
|
|
_: usize,
|
|
_: usize,
|
|
_: usize,
|
|
) -> Result<()> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn avg_pool2d(&self, _: &Layout, _: (usize, usize), _: (usize, usize)) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn max_pool2d(&self, _: &Layout, _: (usize, usize), _: (usize, usize)) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn upsample_nearest1d(&self, _: &Layout, _: usize) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn upsample_nearest2d(&self, _: &Layout, _: usize, _: usize) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
}
|
|
|
|
impl crate::backend::BackendDevice for CudaDevice {
|
|
type Storage = CudaStorage;
|
|
fn new(_: usize) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn set_seed(&self, _: u64) -> Result<()> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn location(&self) -> crate::DeviceLocation {
|
|
fail!()
|
|
}
|
|
|
|
fn same_device(&self, _: &Self) -> bool {
|
|
fail!()
|
|
}
|
|
|
|
fn zeros_impl(&self, _shape: &Shape, _dtype: DType) -> Result<Self::Storage> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
unsafe fn alloc_uninit(&self, _shape: &Shape, _dtype: DType) -> Result<Self::Storage> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn storage_from_slice<T: crate::WithDType>(&self, _: &[T]) -> Result<Self::Storage> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn storage_from_cpu_storage(&self, _: &CpuStorage) -> Result<Self::Storage> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn storage_from_cpu_storage_owned(&self, _: CpuStorage) -> Result<Self::Storage> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn rand_uniform(&self, _: &Shape, _: DType, _: f64, _: f64) -> Result<Self::Storage> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn rand_normal(&self, _: &Shape, _: DType, _: f64, _: f64) -> Result<Self::Storage> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
fn synchronize(&self) -> Result<()> {
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
/// This bool controls whether reduced precision reductions (e.g., with fp16 accumulation type) are
|
|
/// allowed with f16 GEMMs.
|
|
pub fn gemm_reduced_precision_f16() -> bool {
|
|
true
|
|
}
|
|
|
|
/// This bool controls whether reduced precision reductions (e.g., with fp16 accumulation type) are
|
|
/// allowed with f16 GEMMs.
|
|
pub fn set_gemm_reduced_precision_f16(_: bool) {}
|
|
|
|
/// This bool controls whether reduced precision reductions (e.g., with fp16 accumulation type) are
|
|
/// allowed with bf16 GEMMs.
|
|
pub fn gemm_reduced_precision_bf16() -> bool {
|
|
true
|
|
}
|
|
|
|
/// This bool controls whether reduced precision reductions (e.g., with fp16 accumulation type) are
|
|
/// allowed with bf16 GEMMs.
|
|
pub fn set_gemm_reduced_precision_bf16(_: bool) {}
|
|
|
|
/// This bool controls whether reduced precision reductions (e.g., with tf32 accumulation type) are
|
|
/// allowed with f32 GEMMs.
|
|
pub fn gemm_reduced_precision_f32() -> bool {
|
|
true
|
|
}
|
|
|
|
/// This bool controls whether reduced precision reductions (e.g., with tf32 accumulation type) are
|
|
/// allowed with f32 GEMMs.
|
|
pub fn set_gemm_reduced_precision_f32(_b: bool) {}
|