Improve the quantized whisper setup. (#1018)

* Improve the quantized whisper setup.

* Fix the config file paths.

* Use the standard matmul where possible.
This commit is contained in:
Laurent Mazare
2023-10-02 17:17:46 +01:00
committed by GitHub
parent e04c789230
commit 089fc3b584
8 changed files with 66 additions and 49 deletions

View File

@ -206,7 +206,7 @@ impl Benchmark for QMatMul {
fn preprocess() -> Result<Self::PreProcessData> {
let zeros = vec![candle::quantized::k_quants::BlockQ4_0::zeros(); 4096 * 11008 / 32];
let mm = candle::quantized::QTensor::new(zeros, (4096, 11008))?;
let mm = candle::quantized::QMatMul::from_qtensor(mm);
let mm = candle::quantized::QMatMul::from_qtensor(mm)?;
let arg = Tensor::randn(0f32, 1., (128, 11008), &Device::Cpu)?;
Ok((mm, arg))
}