From 3f04a79ada7ca974176a0c7c3c3306f394eae9a9 Mon Sep 17 00:00:00 2001 From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com> Date: Sun, 7 Jan 2024 14:40:15 +0100 Subject: [PATCH 1/4] Use cfg to seperate benchmark results based on features --- candle-core/benches/bench_utils.rs | 56 ++++++++++++++++++++++++++++++ candle-core/benches/matmul.rs | 16 ++++----- 2 files changed, 64 insertions(+), 8 deletions(-) create mode 100644 candle-core/benches/bench_utils.rs diff --git a/candle-core/benches/bench_utils.rs b/candle-core/benches/bench_utils.rs new file mode 100644 index 00000000..75800761 --- /dev/null +++ b/candle-core/benches/bench_utils.rs @@ -0,0 +1,56 @@ +use candle_core::{Device, Result}; + +pub(crate) trait BenchDevice { + fn sync(&self) -> Result<()>; +} + +impl BenchDevice for Device { + fn sync(&self) -> Result<()> { + match self { + Device::Cpu => Ok(()), + Device::Cuda(device) => { + #[cfg(feature = "cuda")] + return Ok(device.synchronize()?); + #[cfg(not(feature = "cuda"))] + panic!("Cuda device without cuda feature enabled: {:?}", device) + } + Device::Metal(device) => { + #[cfg(feature = "metal")] + return Ok(device.wait_until_completed()?); + #[cfg(not(feature = "metal"))] + panic!("Metal device without metal feature enabled: {:?}", device) + } + } + } +} + +#[allow(dead_code)] +pub(crate) fn device() -> Result { + return if cfg!(feature = "metal") { + Device::new_metal(0) + } else if cfg!(feature = "cuda") { + Device::new_cuda(0) + } else { + Ok(Device::Cpu) + }; +} + +#[allow(dead_code)] +pub(crate) fn bench_name>(name: S) -> String { + format!("{}_{}", device_variant(), name.into()) +} + +#[allow(dead_code)] +const fn device_variant() -> &'static str { + return if cfg!(feature = "metal") { + "metal" + } else if cfg!(feature = "cuda") { + "cuda" + } else if cfg!(feature = "accelerate") { + "accelerate" + } else if cfg!(feature = "mkl") { + "mkl" + } else { + "cpu" + }; +} diff --git a/candle-core/benches/matmul.rs b/candle-core/benches/matmul.rs index 83679771..4f7dfa6c 100644 --- a/candle-core/benches/matmul.rs +++ b/candle-core/benches/matmul.rs @@ -1,4 +1,8 @@ -use candle_core::{DType, Device, Tensor}; +mod bench_utils; + +use crate::bench_utils::bench_name; +use bench_utils::{device, BenchDevice}; +use candle_core::{DType, Tensor}; use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput}; use std::time::Instant; @@ -12,14 +16,14 @@ fn criterion_benchmark(c: &mut Criterion) { let n = 2048; let k = 2048; - let device = Device::new_metal(0).unwrap(); + let device = device().unwrap(); let dtype = DType::F32; let lhs = Tensor::zeros((b, m, k), dtype, &device).unwrap(); let rhs = Tensor::zeros((b, n, k), dtype, &device).unwrap(); let flops = b * m * n * k; - let mut group = c.benchmark_group("matmul_metal"); + let mut group = c.benchmark_group(bench_name("matmul")); group.throughput(Throughput::Bytes(flops as u64)); group.bench_function("iter", move |b| { b.iter_custom(|iters| { @@ -27,11 +31,7 @@ fn criterion_benchmark(c: &mut Criterion) { for _i in 0..iters { run(black_box(&lhs), black_box(&rhs)); } - if let Device::Metal(device) = &device { - device.wait_until_completed().unwrap(); - } else { - panic!("Expected metal device"); - } + device.sync().unwrap(); start.elapsed() }) }); From ad075a5f7edb0114b820b3e99a19b17d0d25ec3b Mon Sep 17 00:00:00 2001 From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com> Date: Mon, 8 Jan 2024 06:48:33 +0100 Subject: [PATCH 2/4] Remove allow pragma --- candle-core/benches/matmul.rs | 5 ++--- candle-core/benches/{bench_utils.rs => utils.rs} | 3 --- 2 files changed, 2 insertions(+), 6 deletions(-) rename candle-core/benches/{bench_utils.rs => utils.rs} (96%) diff --git a/candle-core/benches/matmul.rs b/candle-core/benches/matmul.rs index 4f7dfa6c..a5dba9cc 100644 --- a/candle-core/benches/matmul.rs +++ b/candle-core/benches/matmul.rs @@ -1,10 +1,9 @@ -mod bench_utils; +mod utils; -use crate::bench_utils::bench_name; -use bench_utils::{device, BenchDevice}; use candle_core::{DType, Tensor}; use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput}; use std::time::Instant; +use utils::{bench_name, device, BenchDevice}; fn run(a: &Tensor, b: &Tensor) { a.matmul(&b.t().unwrap()).unwrap(); diff --git a/candle-core/benches/bench_utils.rs b/candle-core/benches/utils.rs similarity index 96% rename from candle-core/benches/bench_utils.rs rename to candle-core/benches/utils.rs index 75800761..a93afc6e 100644 --- a/candle-core/benches/bench_utils.rs +++ b/candle-core/benches/utils.rs @@ -24,7 +24,6 @@ impl BenchDevice for Device { } } -#[allow(dead_code)] pub(crate) fn device() -> Result { return if cfg!(feature = "metal") { Device::new_metal(0) @@ -35,12 +34,10 @@ pub(crate) fn device() -> Result { }; } -#[allow(dead_code)] pub(crate) fn bench_name>(name: S) -> String { format!("{}_{}", device_variant(), name.into()) } -#[allow(dead_code)] const fn device_variant() -> &'static str { return if cfg!(feature = "metal") { "metal" From fb05af4c42a6343856f167893b19974ed6b50276 Mon Sep 17 00:00:00 2001 From: Laurent Date: Mon, 8 Jan 2024 07:19:59 +0100 Subject: [PATCH 3/4] Avoid some unnecessary returns. --- candle-core/benches/utils.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/candle-core/benches/utils.rs b/candle-core/benches/utils.rs index a93afc6e..3e8b3c57 100644 --- a/candle-core/benches/utils.rs +++ b/candle-core/benches/utils.rs @@ -25,13 +25,13 @@ impl BenchDevice for Device { } pub(crate) fn device() -> Result { - return if cfg!(feature = "metal") { + if cfg!(feature = "metal") { Device::new_metal(0) } else if cfg!(feature = "cuda") { Device::new_cuda(0) } else { Ok(Device::Cpu) - }; + } } pub(crate) fn bench_name>(name: S) -> String { @@ -39,7 +39,7 @@ pub(crate) fn bench_name>(name: S) -> String { } const fn device_variant() -> &'static str { - return if cfg!(feature = "metal") { + if cfg!(feature = "metal") { "metal" } else if cfg!(feature = "cuda") { "cuda" @@ -49,5 +49,5 @@ const fn device_variant() -> &'static str { "mkl" } else { "cpu" - }; + } } From 88945f2c227d6a76e8f133d8b72c701b180c4f4c Mon Sep 17 00:00:00 2001 From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com> Date: Tue, 9 Jan 2024 18:31:28 +0100 Subject: [PATCH 4/4] Improve benchmarks layout --- candle-core/Cargo.toml | 2 +- candle-core/benches/bench_main.rs | 4 ++++ candle-core/benches/{ => benchmarks}/matmul.rs | 7 ++----- candle-core/benches/{utils.rs => benchmarks/mod.rs} | 2 ++ 4 files changed, 9 insertions(+), 6 deletions(-) create mode 100644 candle-core/benches/bench_main.rs rename candle-core/benches/{ => benchmarks}/matmul.rs (85%) rename candle-core/benches/{utils.rs => benchmarks/mod.rs} (98%) diff --git a/candle-core/Cargo.toml b/candle-core/Cargo.toml index 91655f57..93b718a3 100644 --- a/candle-core/Cargo.toml +++ b/candle-core/Cargo.toml @@ -46,6 +46,6 @@ accelerate = ["dep:libc", "dep:accelerate-src"] metal = ["dep:metal", "dep:candle-metal-kernels"] [[bench]] -name = "matmul" +name = "bench_main" harness = false diff --git a/candle-core/benches/bench_main.rs b/candle-core/benches/bench_main.rs new file mode 100644 index 00000000..4425f2fb --- /dev/null +++ b/candle-core/benches/bench_main.rs @@ -0,0 +1,4 @@ +mod benchmarks; + +use criterion::criterion_main; +criterion_main!(benchmarks::matmul::benches); diff --git a/candle-core/benches/matmul.rs b/candle-core/benches/benchmarks/matmul.rs similarity index 85% rename from candle-core/benches/matmul.rs rename to candle-core/benches/benchmarks/matmul.rs index a5dba9cc..fb173f04 100644 --- a/candle-core/benches/matmul.rs +++ b/candle-core/benches/benchmarks/matmul.rs @@ -1,9 +1,7 @@ -mod utils; - +use crate::benchmarks::{bench_name, device, BenchDevice}; use candle_core::{DType, Tensor}; -use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput}; +use criterion::{black_box, criterion_group, Criterion, Throughput}; use std::time::Instant; -use utils::{bench_name, device, BenchDevice}; fn run(a: &Tensor, b: &Tensor) { a.matmul(&b.t().unwrap()).unwrap(); @@ -38,4 +36,3 @@ fn criterion_benchmark(c: &mut Criterion) { } criterion_group!(benches, criterion_benchmark); -criterion_main!(benches); diff --git a/candle-core/benches/utils.rs b/candle-core/benches/benchmarks/mod.rs similarity index 98% rename from candle-core/benches/utils.rs rename to candle-core/benches/benchmarks/mod.rs index 3e8b3c57..1344770d 100644 --- a/candle-core/benches/utils.rs +++ b/candle-core/benches/benchmarks/mod.rs @@ -1,3 +1,5 @@ +pub(crate) mod matmul; + use candle_core::{Device, Result}; pub(crate) trait BenchDevice {