Metal: Activate bfloat affine and add benchmark (#1543)

* Use cfg to seperate benchmark results based on features

* Add bfloat affine and benchmarks

* Fix flops calculation

* Remove allow pragma

* Avoid some unnecessary returns.

* Improve benchmarks layout

---------

Co-authored-by: Laurent <laurent.mazare@gmail.com>
Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
ivarflakstad
2024-01-12 11:19:49 +01:00
committed by GitHub
parent e90bcdcc7c
commit a3d92ab226
5 changed files with 54 additions and 8 deletions

View File

@ -353,6 +353,7 @@ impl BackendStorage for MetalStorage {
let name = match self.dtype {
DType::F32 => "affine_f32",
DType::F16 => "affine_f16",
DType::BF16 => "affine_bf16",
dtype => crate::bail!("Metal contiguous affine {dtype:?} not implemented"),
};
candle_metal_kernels::call_affine(
@ -371,6 +372,7 @@ impl BackendStorage for MetalStorage {
let name = match self.dtype {
DType::F32 => "affine_f32_strided",
DType::F16 => "affine_f16_strided",
DType::BF16 => "affine_bf16_strided",
dtype => crate::bail!("Metal strided affine {dtype:?} not implemented"),
};
candle_metal_kernels::call_affine_strided(