Fix the matmul layout for accelerate & mkl. (#2011)

* Fix the matmul layout for accelerate & mkl. * Reduce the required precision for pow (because of accelerate). * And a fix the gelu f16 test.
2025-06-21 20:22:49 +00:00 · 2024-04-04 19:18:03 +02:00
parent 5aebe53dd2
commit e6a5b82ba6
3 changed files with 8 additions and 26 deletions
--- a/candle-core/tests/matmul_tests.rs
+++ b/candle-core/tests/matmul_tests.rs
@ -73,20 +73,7 @@ fn squeeze_mm(device: &Device) -> Result<()> {
    let seq_len = 8_usize;
    let a = Tensor::zeros((1, seq_len, 16), DType::F32, device)?;
    let x = a.i((.., seq_len - 1, ..))?;
-    println!(
-        "x shape:{:?}, stride:{:?}, is_contiguous:{}",
-        x.shape(),
-        x.stride(),
-        x.is_contiguous()
-    );
-
    let w = Tensor::zeros((32, 16), DType::F32, device)?.t()?;
-    println!(
-        "w shape:{:?}, stride:{:?}, is_contiguous:{}",
-        w.shape(),
-        w.stride(),
-        w.is_contiguous()
-    );
    let x = x.matmul(&w)?;
    assert_eq!(x.dims(), &[1, 32]);
    Ok(())
--- a/candle-core/tests/tensor_tests.rs
+++ b/candle-core/tests/tensor_tests.rs
@ -107,13 +107,8 @@ fn unary_op(device: &Device) -> Result<()> {
        ]
    );
    let t_f16 = tensor.to_dtype(DType::F16)?.gelu()?.to_dtype(DType::F32)?;
-    assert_eq!(
-        test_utils::to_vec2_round(&t_f16, 2)?,
-        [
-            [-0.0, 0.84, 4.0, -0.05, 0.35],
-            [2.69, -0.07, -0.11, 1.73, 2.79]
-        ],
-    );
+    let max_diff = (tensor.gelu()? - t_f16)?.flatten_all()?.max(0)?;
+    assert!(max_diff.to_vec0::<f32>()? < 5e-3);
    assert_eq!(
        test_utils::to_vec2_round(&tensor.gelu_erf()?, 4)?,
        [
@ -1255,8 +1250,8 @@ fn pow() -> Result<()> {
    let rhs = (&lhs - 2.)?;
    let res = lhs.pow(&rhs)?;
    assert_eq!(
-        test_utils::to_vec2_round(&res, 4)?,
-        [[1.0, 1.0, 3.0], [16.0, 125.0, 1296.0001]]
+        test_utils::to_vec2_round(&res, 3)?,
+        [[1.0, 1.0, 3.0], [16.0, 125.0, 1296.0]]
    );
    Ok(())
 }