mirror of
https://github.com/huggingface/candle.git
synced 2025-06-21 12:20:46 +00:00

* Boilerplate for the quantized cuda support. * More basic cuda support. * More cuda quantization (quantize on cpu for now). * Add the dequantization bit. * Start adding some dedicated cuda kernels from llama.cpp. * Move the kernel code. * Start interfacing with the kernel. * Tweak the kernel launch params. * Bugfix for quantized metal. * Fix some clippy lints. * Tweak the launch parameters. * Tweak cuda basics to perform a quantized matmul. * Perform the dequantization on the cpu + use cublas for matmul. * Add the dequantization kernel. * Test the qmatmul. * More kernels. * Matmul-vec kernel. * Add a couple kernels. * More dequantization kernels.
51 lines
1.2 KiB
Rust
51 lines
1.2 KiB
Rust
#![allow(unused)]
|
|
use super::GgmlDType;
|
|
use crate::{Error, MetalDevice, MetalStorage, Result};
|
|
|
|
pub struct QMetalStorage {
|
|
dtype: GgmlDType,
|
|
device: MetalDevice,
|
|
}
|
|
|
|
impl QMetalStorage {
|
|
pub fn zeros(_: &MetalDevice, _: usize, _: GgmlDType) -> Result<Self> {
|
|
Err(Error::NotCompiledWithMetalSupport)
|
|
}
|
|
|
|
pub fn dtype(&self) -> GgmlDType {
|
|
self.dtype
|
|
}
|
|
|
|
pub fn device(&self) -> &MetalDevice {
|
|
&self.device
|
|
}
|
|
|
|
pub fn dequantize(&self, _elem_count: usize) -> Result<MetalStorage> {
|
|
Err(Error::NotCompiledWithMetalSupport)
|
|
}
|
|
|
|
pub fn quantize(&mut self, _src: &MetalStorage) -> Result<()> {
|
|
Err(Error::NotCompiledWithMetalSupport)
|
|
}
|
|
|
|
pub fn storage_size_in_bytes(&self) -> usize {
|
|
0
|
|
}
|
|
|
|
pub fn fwd(
|
|
&self,
|
|
_self_shape: &crate::Shape,
|
|
_storage: &MetalStorage,
|
|
_layout: &crate::Layout,
|
|
) -> Result<(MetalStorage, crate::Shape)> {
|
|
Err(Error::NotCompiledWithMetalSupport)
|
|
}
|
|
}
|
|
|
|
pub fn load_quantized<T: super::GgmlType + Send + Sync + 'static>(
|
|
_device: &MetalDevice,
|
|
_data: &[T],
|
|
) -> Result<super::QStorage> {
|
|
Err(Error::NotCompiledWithMetalSupport)
|
|
}
|