diff --git a/candle-core/src/quantized/mod.rs b/candle-core/src/quantized/mod.rs index f7abcd93..47307f2e 100644 --- a/candle-core/src/quantized/mod.rs +++ b/candle-core/src/quantized/mod.rs @@ -398,7 +398,7 @@ impl QMatMul { _ => DEQUANTIZE_ALL.with(|b| *b), }; let t = if dequantize { - let tensor = qtensor.dequantize(&Device::Cpu)?; + let tensor = qtensor.dequantize(&qtensor.device())?; Self::Tensor(tensor) } else { Self::QTensor(qtensor)