From 45936a18f8970fd1aa9288f550f8efd747394240 Mon Sep 17 00:00:00 2001 From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com> Date: Tue, 9 Jan 2024 18:54:48 +0100 Subject: [PATCH] Update with feature separated benchmarks --- candle-core/Cargo.toml | 5 -- candle-core/benches/bench_main.rs | 2 +- candle-core/benches/benchmarks/fill.rs | 43 +++++++++++++++++ candle-core/benches/benchmarks/mod.rs | 1 + candle-core/benches/fill.rs | 67 -------------------------- 5 files changed, 45 insertions(+), 73 deletions(-) create mode 100644 candle-core/benches/benchmarks/fill.rs delete mode 100644 candle-core/benches/fill.rs diff --git a/candle-core/Cargo.toml b/candle-core/Cargo.toml index 1b279999..afdb67cd 100644 --- a/candle-core/Cargo.toml +++ b/candle-core/Cargo.toml @@ -48,8 +48,3 @@ metal = ["dep:metal", "dep:candle-metal-kernels"] [[bench]] name = "bench_main" harness = false - -[[bench]] -name = "fill" -harness = false - diff --git a/candle-core/benches/bench_main.rs b/candle-core/benches/bench_main.rs index 4425f2fb..6362d803 100644 --- a/candle-core/benches/bench_main.rs +++ b/candle-core/benches/bench_main.rs @@ -1,4 +1,4 @@ mod benchmarks; use criterion::criterion_main; -criterion_main!(benchmarks::matmul::benches); +criterion_main!(benchmarks::matmul::benches, benchmarks::fill::benches); diff --git a/candle-core/benches/benchmarks/fill.rs b/candle-core/benches/benchmarks/fill.rs new file mode 100644 index 00000000..94268aa8 --- /dev/null +++ b/candle-core/benches/benchmarks/fill.rs @@ -0,0 +1,43 @@ +use crate::benchmarks::{bench_name, device, BenchDevice}; +use candle_core::{DType, Device, Tensor}; +use criterion::{black_box, criterion_group, Criterion, Throughput}; +use std::time::Instant; + +fn run(shape: (usize, usize, usize), dtype: DType, device: &Device) { + Tensor::ones(shape, dtype, device).unwrap(); +} + +fn run_fill_benchmark(c: &mut Criterion, name: &str, dtype: DType) { + let b = 1; + let rows = 4096; + let columns = 4096; + + let flops = b * rows * columns * dtype.size_in_bytes(); + + let device = device().unwrap(); + + let mut group = c.benchmark_group(bench_name(name)); + group.throughput(Throughput::Bytes(flops as u64)); + group.bench_function("iter", move |bencher| { + bencher.iter_custom(|iters| { + let start = Instant::now(); + for _i in 0..iters { + run( + black_box((b, rows, columns)), + black_box(dtype), + black_box(&device), + ); + } + device.sync().unwrap(); + start.elapsed() + }) + }); + group.finish(); +} + +fn criterion_benchmark(c: &mut Criterion) { + run_fill_benchmark(c, "fill_u8", DType::U8); + run_fill_benchmark(c, "fill_f32", DType::F32); +} + +criterion_group!(benches, criterion_benchmark); diff --git a/candle-core/benches/benchmarks/mod.rs b/candle-core/benches/benchmarks/mod.rs index 1344770d..9bfbf83a 100644 --- a/candle-core/benches/benchmarks/mod.rs +++ b/candle-core/benches/benchmarks/mod.rs @@ -1,3 +1,4 @@ +pub(crate) mod fill; pub(crate) mod matmul; use candle_core::{Device, Result}; diff --git a/candle-core/benches/fill.rs b/candle-core/benches/fill.rs deleted file mode 100644 index 9bd0aa72..00000000 --- a/candle-core/benches/fill.rs +++ /dev/null @@ -1,67 +0,0 @@ -use candle_core::{DType, Device, Tensor}; -use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput}; -use std::time::Instant; - -fn run(shape: (usize, usize, usize), dtype: DType, device: &Device) { - Tensor::ones(shape, dtype, device).unwrap(); -} - -fn criterion_benchmark(c: &mut Criterion) { - let b = 1; - let rows = 4096; - let columns = 4096; - - let flops = b * rows * columns; - - let device1 = Device::new_metal(0).unwrap(); - let device2 = device1.clone(); - - let mut group = c.benchmark_group("fill_metal_u8"); - group.throughput(Throughput::Bytes(flops as u64)); - group.bench_function("iter", move |bencher| { - bencher.iter_custom(|iters| { - let start = Instant::now(); - for _i in 0..iters { - run( - black_box((b, rows, columns)), - black_box(DType::U8), - black_box(&device1), - ); - } - if let Device::Metal(device) = &device1 { - device.wait_until_completed().unwrap(); - } else { - panic!("Expected metal device"); - } - start.elapsed() - }) - }); - group.finish(); - - let mut group = c.benchmark_group("fill_metal_f32"); - group.throughput(Throughput::Bytes( - (flops * DType::F32.size_in_bytes()) as u64, - )); - group.bench_function("iter", move |bencher| { - bencher.iter_custom(|iters| { - let start = Instant::now(); - for _i in 0..iters { - run( - black_box((b, rows, columns)), - black_box(DType::F32), - black_box(&device2), - ); - } - if let Device::Metal(device) = &device2 { - device.wait_until_completed().unwrap(); - } else { - panic!("Expected metal device"); - } - start.elapsed() - }) - }); - group.finish(); -} - -criterion_group!(benches, criterion_benchmark); -criterion_main!(benches);