Efficient implementation of Tensor::ones() for metal (#2512)

* WIP: hopefully better const impl * with GPU * More tests on * Reverting primitive for * Incorporating review changes - added check elem count check in kerner, using for call strategy * rustfmt ran
2025-06-16 18:48:51 +00:00 · 2024-10-01 22:41:59 +05:30
parent def4c6cdee
commit a2bcc227df
5 changed files with 194 additions and 4 deletions
--- a/candle-core/tests/tensor_tests.rs
+++ b/candle-core/tests/tensor_tests.rs
@ -29,6 +29,36 @@ fn ones(device: &Device) -> Result<()> {
        Tensor::ones((2, 3), DType::F64, device)?.to_vec2::<f64>()?,
        [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
    );
+    assert_eq!(
+        Tensor::ones((2, 3), DType::F16, device)?.to_vec2::<half::f16>()?,
+        [
+            [
+                half::f16::from_f32(1.0),
+                half::f16::from_f32(1.0),
+                half::f16::from_f32(1.0)
+            ],
+            [
+                half::f16::from_f32(1.0),
+                half::f16::from_f32(1.0),
+                half::f16::from_f32(1.0)
+            ]
+        ],
+    );
+    assert_eq!(
+        Tensor::ones((2, 3), DType::BF16, device)?.to_vec2::<half::bf16>()?,
+        [
+            [
+                half::bf16::from_f32(1.0),
+                half::bf16::from_f32(1.0),
+                half::bf16::from_f32(1.0)
+            ],
+            [
+                half::bf16::from_f32(1.0),
+                half::bf16::from_f32(1.0),
+                half::bf16::from_f32(1.0)
+            ]
+        ],
+    );
    Ok(())
 }