diff --git a/candle-core/src/quantized/ggml_file.rs b/candle-core/src/quantized/ggml_file.rs index e4ad35cd..1dd3d9c0 100644 --- a/candle-core/src/quantized/ggml_file.rs +++ b/candle-core/src/quantized/ggml_file.rs @@ -135,7 +135,13 @@ pub fn qtensor_from_ggml( dims: Vec, ) -> Result { let tensor_elems = dims.iter().product::(); - let size_in_bytes = tensor_elems * ggml_dtype.type_size() / ggml_dtype.blck_size(); + let blck_size = ggml_dtype.blck_size(); + if tensor_elems % blck_size != 0 { + crate::bail!( + "the number of elements {tensor_elems} is not divisible by the block size {blck_size}" + ) + } + let size_in_bytes = tensor_elems / blck_size * ggml_dtype.type_size(); match ggml_dtype { GgmlDType::F32 => from_raw_data::(raw_data, size_in_bytes, dims), diff --git a/candle-core/src/quantized/gguf_file.rs b/candle-core/src/quantized/gguf_file.rs index b3993034..3a5f2030 100644 --- a/candle-core/src/quantized/gguf_file.rs +++ b/candle-core/src/quantized/gguf_file.rs @@ -59,8 +59,13 @@ impl TensorInfo { tensor_data_offset: u64, ) -> Result { let tensor_elems = self.shape.elem_count(); - let size_in_bytes = - tensor_elems * self.ggml_dtype.type_size() / self.ggml_dtype.blck_size(); + let blck_size = self.ggml_dtype.blck_size(); + if tensor_elems % blck_size != 0 { + crate::bail!( + "the number of elements {tensor_elems} is not divisible by the block size {blck_size}" + ) + } + let size_in_bytes = tensor_elems / blck_size * self.ggml_dtype.type_size(); let mut raw_data = vec![0u8; size_in_bytes]; reader.seek(std::io::SeekFrom::Start(tensor_data_offset + self.offset))?; reader.read_exact(&mut raw_data)?;