mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 10:38:54 +00:00
Optimize the batched matmul for the cpu backend. (#2884)
This commit is contained in:
@ -1289,6 +1289,15 @@ impl Map2 for MatMul {
|
||||
} else {
|
||||
Parallelism::None
|
||||
};
|
||||
let (b, m, n, k) = if b_skip == 0 && a_skip == m * k {
|
||||
// a_skip and c_skip should be updated but step is always 0 so
|
||||
// it wouldn't matter.
|
||||
(1, b * m, n, k)
|
||||
} else if a_skip == 0 && b_skip == n * k {
|
||||
(1, m, b * n, k)
|
||||
} else {
|
||||
(b, m, n, k)
|
||||
};
|
||||
for step in 0..b {
|
||||
let lhs_p = &lhs[step * a_skip..];
|
||||
let rhs_p = &rhs[step * b_skip..];
|
||||
|
Reference in New Issue
Block a user