mirror of
https://github.com/huggingface/candle.git
synced 2025-06-14 09:57:10 +00:00
Make tensor contiguous before the repeat_kv calls to avoid strided copies (#2953)
This commit is contained in:
@ -217,6 +217,10 @@ impl AttentionWeights {
|
||||
}
|
||||
let (k, v) = self.kv_cache.append(&k.contiguous()?, &v.contiguous()?)?;
|
||||
|
||||
// Make tensor contiguous to avoid some strided copies
|
||||
let k = k.contiguous()?;
|
||||
let v = v.contiguous()?;
|
||||
|
||||
let k = repeat_kv(k, self.num_kv_groups)?.contiguous()?;
|
||||
let v = repeat_kv(v, self.num_kv_groups)?.contiguous()?;
|
||||
|
||||
|
Reference in New Issue
Block a user