From 84250bf52f58528cf59dca3b82effd9f07a13cc7 Mon Sep 17 00:00:00 2001 From: optman Date: Sat, 6 Jan 2024 18:43:01 +0800 Subject: [PATCH] fix index_pos bug when kv cache is disabled. (#1517) * fix index_pos bug when kv cache is disabled * Tweak the fix. --------- Co-authored-by: laurent --- candle-examples/examples/llama/main.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/candle-examples/examples/llama/main.rs b/candle-examples/examples/llama/main.rs index 46f474bb..251c184b 100644 --- a/candle-examples/examples/llama/main.rs +++ b/candle-examples/examples/llama/main.rs @@ -165,14 +165,14 @@ fn main() -> Result<()> { let mut index_pos = 0; let mut token_generated = 0; for index in 0..args.sample_len { - let context_size = if cache.use_kv_cache && index > 0 { - 1 + let (context_size, context_index) = if cache.use_kv_cache && index > 0 { + (1, index_pos) } else { - tokens.len() + (tokens.len(), 0) }; let ctxt = &tokens[tokens.len().saturating_sub(context_size)..]; let input = Tensor::new(ctxt, &device)?.unsqueeze(0)?; - let logits = llama.forward(&input, index_pos)?; + let logits = llama.forward(&input, context_index)?; let logits = logits.squeeze(0)?; let logits = if args.repeat_penalty == 1. { logits