mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 02:38:10 +00:00
Improve the handling of matmul with squeezed layouts. (#1998)
* Improve the handling of matmul with squeezed layouts. * Fix for the cuda backend. * Revert the temporary fix.
This commit is contained in:
@ -535,7 +535,7 @@ impl Module for AttentionBlock {
|
||||
|
||||
// TODO: revert the call to force_contiguous once the three matmul kernels have been
|
||||
// adapted to handle layout with some dims set to 1.
|
||||
let xs = attention_probs.matmul(&value_states.force_contiguous()?)?;
|
||||
let xs = attention_probs.matmul(&value_states)?;
|
||||
let xs = xs.to_dtype(in_dtype)?;
|
||||
let xs = xs.transpose(1, 2)?.contiguous()?;
|
||||
let xs = xs.flatten_from(D::Minus2)?;
|
||||
|
Reference in New Issue
Block a user