From ab12425bffc5f7de20553ea54e73933fbb522d9d Mon Sep 17 00:00:00 2001 From: laurent Date: Thu, 26 Sep 2024 10:14:53 +0200 Subject: [PATCH] Another tweak. --- candle-core/src/quantized/cuda.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/candle-core/src/quantized/cuda.rs b/candle-core/src/quantized/cuda.rs index 525cf8c8..a66b7e00 100644 --- a/candle-core/src/quantized/cuda.rs +++ b/candle-core/src/quantized/cuda.rs @@ -445,13 +445,16 @@ impl QCudaStorage { } _ => crate::bail!("only f32 can be quantized"), }; - let src_len = pad_for_alloc(src.len()); + let src_len = src.len(); let src = crate::Storage::Cpu(crate::CpuStorage::F32(src)); let mut qcpu_storage = crate::Device::Cpu.qzeros(src_len, self.dtype)?; qcpu_storage.quantize(&src)?; let data = qcpu_storage.data()?; - let data = self.device.htod_sync_copy(data.as_ref()).w()?; - self.data = data; + let mut dst = self.device.alloc_zeros::(pad_for_alloc(src_len)).w()?; + self.device + .htod_sync_copy_into(data.as_ref(), &mut dst.slice_mut(..src_len)) + .w()?; + self.data = dst; Ok(()) }