diff --git a/candle-core/examples/cpu_benchmarks.rs b/candle-core/examples/cpu_benchmarks.rs new file mode 100644 index 00000000..f9b28a4e --- /dev/null +++ b/candle-core/examples/cpu_benchmarks.rs @@ -0,0 +1,81 @@ +/// This example contains some simple benchmarks so that it's easy to run them in perf etc. +#[cfg(feature = "mkl")] +extern crate intel_mkl_src; + +#[cfg(feature = "accelerate")] +extern crate accelerate_src; + +// use candle::quantized::GgmlType; +use candle::{DType, Device, Result, Tensor}; +// use clap::{Parser, Subcommand}; + +// fn softmax(xs: &Tensor, dim: D) -> Result { +// let dim = dim.to_index(xs.shape(), "softmax")?; +// let max = xs.max_keepdim(dim)?; +// let diff = xs.broadcast_sub(&max)?; +// let num = diff.exp()?; +// let den = num.sum_keepdim(dim)?; +// num.broadcast_div(&den) +// } + +trait Benchmark { + type PreProcessData; + type RunResult; + + fn preprocess() -> Result; + fn run_one(_: &Self::PreProcessData) -> Result; + + const ITERS: usize; +} + +struct Matmul; +impl Benchmark for Matmul { + type PreProcessData = (Tensor, Tensor); + type RunResult = Tensor; + fn preprocess() -> Result { + let lhs = Tensor::randn((1024, 1024), DType::F32, &Device::Cpu, 1.0, 0.0)?; + let rhs = Tensor::randn((1024, 1024), DType::F32, &Device::Cpu, 1.0, 0.0)?; + Ok((lhs, rhs)) + } + + fn run_one(d: &Self::PreProcessData) -> Result { + d.0.matmul(&d.1) + } + + const ITERS: usize = 100; +} + +// struct Softmax; +// impl Benchmark for Softmax { +// type PreProcessData = Tensor; +// type RunResult = Tensor; +// fn preprocess() -> Result { +// // Typical whisper tiny size. +// let x = Tensor::randn(0f32, 1., (1, 6, 200, 1500), &Device::Cpu)?; +// Ok(x) +// } +// +// fn run_one(d: &Self::PreProcessData) -> Result { +// softmax(d, D::Minus1) +// } +// +// const ITERS: usize = 100; +// } + +fn run(iters: Option) -> Result<()> { + use std::hint::black_box; + + let iters = iters.unwrap_or(B::ITERS); + let d = B::preprocess()?; + let start = std::time::Instant::now(); + for _iter in 0..iters { + let _res = black_box(B::run_one(black_box(&d))?); + } + println!("{:?}", start.elapsed() / iters as u32); + Ok(()) +} + +fn main() -> Result<()> { + run::(None)?; + Ok(()) +}