mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 10:38:54 +00:00
Make tensor contiguous before the repeat_kv calls to avoid strided copies (#2953)
This commit is contained in:
@ -217,6 +217,10 @@ impl AttentionWeights {
|
|||||||
}
|
}
|
||||||
let (k, v) = self.kv_cache.append(&k.contiguous()?, &v.contiguous()?)?;
|
let (k, v) = self.kv_cache.append(&k.contiguous()?, &v.contiguous()?)?;
|
||||||
|
|
||||||
|
// Make tensor contiguous to avoid some strided copies
|
||||||
|
let k = k.contiguous()?;
|
||||||
|
let v = v.contiguous()?;
|
||||||
|
|
||||||
let k = repeat_kv(k, self.num_kv_groups)?.contiguous()?;
|
let k = repeat_kv(k, self.num_kv_groups)?.contiguous()?;
|
||||||
let v = repeat_kv(v, self.num_kv_groups)?.contiguous()?;
|
let v = repeat_kv(v, self.num_kv_groups)?.contiguous()?;
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user