Faster matmul when we can fall back to gemv.

2025-06-19 03:54:56 +00:00 · 2023-08-04 22:44:30 +01:00
parent f7b2a0391d
commit 3fa3623135
3 changed files with 14 additions and 5 deletions
--- a/candle-examples/examples/llama2-c/weights.rs
+++ b/candle-examples/examples/llama2-c/weights.rs
@ -111,6 +111,7 @@ impl TransformerWeights {
        // matrix column major rather than row major. This ends up speeding up text generation from
        // 120 token/s to 220 token/s on a Ryzen 2600X.
        let tr = device.is_cpu() && !candle::utils::has_mkl();
+        let tr = false;
        let tr = |x: Tensor| if tr { x.t()?.contiguous()?.t() } else { Ok(x) };
        let mut ws = std::collections::HashMap::new();
        let mut insert = |name: &str, t: Tensor| {