working bfloat matmul

2025-06-21 20:22:49 +00:00 · 2024-03-18 14:38:40 +01:00
parent 0c09d10f32
commit c974dee369
6 changed files with 32 additions and 16 deletions
--- a/candle-core/benches/bench_main.rs
+++ b/candle-core/benches/bench_main.rs
@ -2,8 +2,8 @@ mod benchmarks;

 use criterion::criterion_main;
 criterion_main!(
-    benchmarks::affine::benches,
+    //benchmarks::affine::benches,
    benchmarks::matmul::benches,
-    benchmarks::random::benches,
-    benchmarks::where_cond::benches
+    //benchmarks::random::benches,
+    //benchmarks::where_cond::benches
 );
--- a/candle-core/benches/benchmarks/matmul.rs
+++ b/candle-core/benches/benchmarks/matmul.rs
@ -13,11 +13,11 @@ fn run_bench(c: &mut Criterion, device: &Device) {
    let n = 2048;
    let k = 2048;

-    let dtype = DType::F32;
+    let dtype = DType::BF16;
    let lhs = Tensor::zeros((b, m, k), dtype, device).unwrap();
    let rhs = Tensor::zeros((b, n, k), dtype, device).unwrap();

-    let flops = b * m * n * k;
+    let flops = b * m * n * k * dtype.size_in_bytes();

    let mut group = c.benchmark_group(device.bench_name("matmul"));
    group.throughput(Throughput::Bytes(flops as u64));
--- a/candle-core/src/metal_backend.rs
+++ b/candle-core/src/metal_backend.rs
@ -1254,6 +1254,7 @@ impl BackendStorage for MetalStorage {
        let name = match self.dtype {
            DType::F32 => "sgemm",
            DType::F16 => "hgemm",
+            DType::BF16 => "bgemm",
            dtype => {
                return Err(MetalError::Message(format!("matmul doesn't support {dtype:?}")).into())
            }