This commit is contained in:
laurent
2024-09-26 00:05:17 +02:00
parent 46acac5a64
commit 43a8cbe244

View File

@ -34,10 +34,13 @@ fn ceil_div(p: usize, q: usize) -> usize {
} }
fn pad(p: usize, q: usize) -> usize { fn pad(p: usize, q: usize) -> usize {
ceil_div(p, q) * q
}
fn pad_for_alloc(p: usize) -> usize {
// Overallocate by q rather than just padding by q as this should pad the last row // Overallocate by q rather than just padding by q as this should pad the last row
// and we don't have enough information here to know how many elements to add :( // and we don't have enough information here to know how many elements to add :(
// ceil_div(p, q) * q p + MATRIX_ROW_PADDING
p + q
} }
fn quantize_q8_1( fn quantize_q8_1(
@ -442,7 +445,7 @@ impl QCudaStorage {
} }
_ => crate::bail!("only f32 can be quantized"), _ => crate::bail!("only f32 can be quantized"),
}; };
let src_len = pad(src.len(), MATRIX_ROW_PADDING); let src_len = pad_for_alloc(src.len());
let src = crate::Storage::Cpu(crate::CpuStorage::F32(src)); let src = crate::Storage::Cpu(crate::CpuStorage::F32(src));
let mut qcpu_storage = crate::Device::Cpu.qzeros(src_len, self.dtype)?; let mut qcpu_storage = crate::Device::Cpu.qzeros(src_len, self.dtype)?;
qcpu_storage.quantize(&src)?; qcpu_storage.quantize(&src)?;