mirror of
https://github.com/huggingface/candle.git
synced 2025-06-19 11:56:45 +00:00

* Add the cuda dequantize f16 kernels. * Expose the cuda kernels. * Add some testing + fix. * Test the other cases too. * A few more tests. * Add an environment variable to enable the dequantize f16 + matmul behavior.
55 lines
1.3 KiB
Rust
55 lines
1.3 KiB
Rust
#![allow(unused)]
|
|
use super::GgmlDType;
|
|
use crate::{CudaDevice, CudaStorage, Error, Result};
|
|
|
|
pub struct QCudaStorage {
|
|
dtype: GgmlDType,
|
|
device: CudaDevice,
|
|
}
|
|
|
|
impl QCudaStorage {
|
|
pub fn zeros(_: &CudaDevice, _: usize, _: GgmlDType) -> Result<Self> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
pub fn dtype(&self) -> GgmlDType {
|
|
self.dtype
|
|
}
|
|
|
|
pub fn device(&self) -> &CudaDevice {
|
|
&self.device
|
|
}
|
|
|
|
pub fn dequantize(&self, _elem_count: usize) -> Result<CudaStorage> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
pub fn dequantize_f16(&self, _elem_count: usize) -> Result<CudaStorage> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
pub fn quantize(&mut self, _src: &CudaStorage) -> Result<()> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
|
|
pub fn storage_size_in_bytes(&self) -> usize {
|
|
0
|
|
}
|
|
|
|
pub fn fwd(
|
|
&self,
|
|
_self_shape: &crate::Shape,
|
|
_storage: &CudaStorage,
|
|
_layout: &crate::Layout,
|
|
) -> Result<(CudaStorage, crate::Shape)> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|
|
}
|
|
|
|
pub fn load_quantized<T: super::GgmlType + Send + Sync + 'static>(
|
|
_device: &CudaDevice,
|
|
_data: &[T],
|
|
) -> Result<super::QStorage> {
|
|
Err(Error::NotCompiledWithCudaSupport)
|
|
}
|