Tweak the kv-cache flag.

2025-06-17 11:08:52 +00:00 · 2023-06-29 22:16:40 +01:00
parent ae3f202f3b
commit f6152e74b6
1 changed files with 4 additions and 4 deletions
--- a/candle-core/examples/llama/main.rs
+++ b/candle-core/examples/llama/main.rs
@ -448,9 +448,9 @@ struct Args {
    #[arg(long, default_value_t = 100)]
    sample_len: usize,

-    /// Enable the key-value cache.
-    #[arg(long, default_value_t = true)]
-    use_kv_cache: bool,
+    /// Disable the key-value cache.
+    #[arg(long)]
+    no_kv_cache: bool,
 }

 #[tokio::main]
@ -464,7 +464,7 @@ async fn main() -> Result<()> {
        Device::new_cuda(0)?
    };
    let config = Config::config_7b();
-    let cache = Cache::new(args.use_kv_cache, &config, &device);
+    let cache = Cache::new(!args.no_kv_cache, &config, &device);
    let start = std::time::Instant::now();
    let (llama, tokenizer_filename) = match args.npy {
        Some(npy) => {