Fixing quantized llama demo on metal. (#1703)

2025-06-20 04:00:28 +00:00 · 2024-02-13 16:28:56 +01:00
parent ad73e93da2
commit c1b418586c
4 changed files with 34 additions and 13 deletions
--- a/candle-core/src/quantized/ggml_file.rs
+++ b/candle-core/src/quantized/ggml_file.rs
@ -233,6 +233,7 @@ pub struct Content {
    pub hparams: HParams,
    pub vocab: Vocab,
    pub tensors: HashMap<String, super::QTensor>,
+    pub device: Device,
 }

 impl Content {
@ -252,11 +253,13 @@ impl Content {
            let (name, tensor) = read_one_tensor(reader, magic, device)?;
            tensors.insert(name, tensor);
        }
+        let device = device.clone();
        Ok(Self {
            magic,
            hparams,
            vocab,
            tensors,
+            device,
        })
    }