Add the cuda dequantize f16 kernels. (#2137)

* Add the cuda dequantize f16 kernels. * Expose the cuda kernels. * Add some testing + fix. * Test the other cases too. * A few more tests. * Add an environment variable to enable the dequantize f16 + matmul behavior.
2025-06-19 19:58:35 +00:00 · 2024-04-28 20:05:05 +02:00
parent c68ed8963f
commit eb26e2467e
5 changed files with 317 additions and 55 deletions
--- a/candle-core/src/quantized/dummy_cuda.rs
+++ b/candle-core/src/quantized/dummy_cuda.rs
@ -24,6 +24,10 @@ impl QCudaStorage {
        Err(Error::NotCompiledWithCudaSupport)
    }

+    pub fn dequantize_f16(&self, _elem_count: usize) -> Result<CudaStorage> {
+        Err(Error::NotCompiledWithCudaSupport)
+    }
+
    pub fn quantize(&mut self, _src: &CudaStorage) -> Result<()> {
        Err(Error::NotCompiledWithCudaSupport)
    }