diff --git a/candle-core/examples/llama/main.rs b/candle-core/examples/llama/main.rs
index 2ec2a9da..fac1e14f 100644
--- a/candle-core/examples/llama/main.rs
+++ b/candle-core/examples/llama/main.rs
@@ -448,9 +448,9 @@ struct Args {
     #[arg(long, default_value_t = 100)]
     sample_len: usize,
 
-    /// Enable the key-value cache.
-    #[arg(long, default_value_t = true)]
-    use_kv_cache: bool,
+    /// Disable the key-value cache.
+    #[arg(long)]
+    no_kv_cache: bool,
 }
 
 #[tokio::main]
@@ -464,7 +464,7 @@ async fn main() -> Result<()> {
         Device::new_cuda(0)?
     };
     let config = Config::config_7b();
-    let cache = Cache::new(args.use_kv_cache, &config, &device);
+    let cache = Cache::new(!args.no_kv_cache, &config, &device);
     let start = std::time::Instant::now();
     let (llama, tokenizer_filename) = match args.npy {
         Some(npy) => {