Removing the fences speeds everything up and *is* correct this time...

This commit is contained in:
Nicolas Patry
2024-01-05 19:26:30 +01:00
parent 61ad8d91cc
commit c8c603ce96
4 changed files with 70 additions and 71 deletions

View File

@ -250,7 +250,7 @@ fn main() -> Result<()> {
let vb =
candle_transformers::quantized_var_builder::VarBuilder::from_gguf(filename, &device)?;
let model = QMistral::new(&config, vb)?;
(Model::Quantized(model), Device::Cpu)
(Model::Quantized(model), device)
} else {
let dtype = if device.is_cuda() {
DType::BF16