mirror of
https://github.com/huggingface/candle.git
synced 2025-06-21 20:22:49 +00:00
working bfloat matmul
This commit is contained in:
@ -2,8 +2,8 @@ mod benchmarks;
|
||||
|
||||
use criterion::criterion_main;
|
||||
criterion_main!(
|
||||
benchmarks::affine::benches,
|
||||
//benchmarks::affine::benches,
|
||||
benchmarks::matmul::benches,
|
||||
benchmarks::random::benches,
|
||||
benchmarks::where_cond::benches
|
||||
//benchmarks::random::benches,
|
||||
//benchmarks::where_cond::benches
|
||||
);
|
||||
|
@ -13,11 +13,11 @@ fn run_bench(c: &mut Criterion, device: &Device) {
|
||||
let n = 2048;
|
||||
let k = 2048;
|
||||
|
||||
let dtype = DType::F32;
|
||||
let dtype = DType::BF16;
|
||||
let lhs = Tensor::zeros((b, m, k), dtype, device).unwrap();
|
||||
let rhs = Tensor::zeros((b, n, k), dtype, device).unwrap();
|
||||
|
||||
let flops = b * m * n * k;
|
||||
let flops = b * m * n * k * dtype.size_in_bytes();
|
||||
|
||||
let mut group = c.benchmark_group(device.bench_name("matmul"));
|
||||
group.throughput(Throughput::Bytes(flops as u64));
|
||||
|
@ -1254,6 +1254,7 @@ impl BackendStorage for MetalStorage {
|
||||
let name = match self.dtype {
|
||||
DType::F32 => "sgemm",
|
||||
DType::F16 => "hgemm",
|
||||
DType::BF16 => "bgemm",
|
||||
dtype => {
|
||||
return Err(MetalError::Message(format!("matmul doesn't support {dtype:?}")).into())
|
||||
}
|
||||
|
Reference in New Issue
Block a user