Add a flag to force running the quantized model on CPUs. (#1778)

* Add a flag to force running the quantized model on CPUs. * Add encodec to the readme.
2025-06-15 10:26:33 +00:00 · 2024-02-28 14:58:42 +01:00
parent 60ee5cfd4d
commit 57267cd536
2 changed files with 10 additions and 2 deletions
--- a/candle-examples/examples/quantized/main.rs
+++ b/candle-examples/examples/quantized/main.rs
@ -216,6 +216,10 @@ struct Args {
    #[arg(long)]
    split_prompt: bool,

+    /// Run on CPU rather than GPU even if a GPU is available.
+    #[arg(long)]
+    cpu: bool,
+
    /// Penalty to be applied for repeating tokens, 1. means no penalty.
    #[arg(long, default_value_t = 1.1)]
    repeat_penalty: f32,
@ -365,7 +369,7 @@ fn main() -> anyhow::Result<()> {
    let model_path = args.model()?;
    let mut file = std::fs::File::open(&model_path)?;
    let start = std::time::Instant::now();
-    let device = candle_examples::device(false)?;
+    let device = candle_examples::device(args.cpu)?;

    let mut model = match model_path.extension().and_then(|v| v.to_str()) {
        Some("gguf") => {