mirror of
https://github.com/huggingface/candle.git
synced 2025-06-15 10:26:33 +00:00
Adding benchmark.
This commit is contained in:
81
candle-core/examples/cpu_benchmarks.rs
Normal file
81
candle-core/examples/cpu_benchmarks.rs
Normal file
@ -0,0 +1,81 @@
|
||||
/// This example contains some simple benchmarks so that it's easy to run them in perf etc.
|
||||
#[cfg(feature = "mkl")]
|
||||
extern crate intel_mkl_src;
|
||||
|
||||
#[cfg(feature = "accelerate")]
|
||||
extern crate accelerate_src;
|
||||
|
||||
// use candle::quantized::GgmlType;
|
||||
use candle::{DType, Device, Result, Tensor};
|
||||
// use clap::{Parser, Subcommand};
|
||||
|
||||
// fn softmax<D: candle::shape::Dim>(xs: &Tensor, dim: D) -> Result<Tensor> {
|
||||
// let dim = dim.to_index(xs.shape(), "softmax")?;
|
||||
// let max = xs.max_keepdim(dim)?;
|
||||
// let diff = xs.broadcast_sub(&max)?;
|
||||
// let num = diff.exp()?;
|
||||
// let den = num.sum_keepdim(dim)?;
|
||||
// num.broadcast_div(&den)
|
||||
// }
|
||||
|
||||
trait Benchmark {
|
||||
type PreProcessData;
|
||||
type RunResult;
|
||||
|
||||
fn preprocess() -> Result<Self::PreProcessData>;
|
||||
fn run_one(_: &Self::PreProcessData) -> Result<Self::RunResult>;
|
||||
|
||||
const ITERS: usize;
|
||||
}
|
||||
|
||||
struct Matmul;
|
||||
impl Benchmark for Matmul {
|
||||
type PreProcessData = (Tensor, Tensor);
|
||||
type RunResult = Tensor;
|
||||
fn preprocess() -> Result<Self::PreProcessData> {
|
||||
let lhs = Tensor::randn((1024, 1024), DType::F32, &Device::Cpu, 1.0, 0.0)?;
|
||||
let rhs = Tensor::randn((1024, 1024), DType::F32, &Device::Cpu, 1.0, 0.0)?;
|
||||
Ok((lhs, rhs))
|
||||
}
|
||||
|
||||
fn run_one(d: &Self::PreProcessData) -> Result<Self::RunResult> {
|
||||
d.0.matmul(&d.1)
|
||||
}
|
||||
|
||||
const ITERS: usize = 100;
|
||||
}
|
||||
|
||||
// struct Softmax;
|
||||
// impl Benchmark for Softmax {
|
||||
// type PreProcessData = Tensor;
|
||||
// type RunResult = Tensor;
|
||||
// fn preprocess() -> Result<Self::PreProcessData> {
|
||||
// // Typical whisper tiny size.
|
||||
// let x = Tensor::randn(0f32, 1., (1, 6, 200, 1500), &Device::Cpu)?;
|
||||
// Ok(x)
|
||||
// }
|
||||
//
|
||||
// fn run_one(d: &Self::PreProcessData) -> Result<Self::RunResult> {
|
||||
// softmax(d, D::Minus1)
|
||||
// }
|
||||
//
|
||||
// const ITERS: usize = 100;
|
||||
// }
|
||||
|
||||
fn run<B: Benchmark>(iters: Option<usize>) -> Result<()> {
|
||||
use std::hint::black_box;
|
||||
|
||||
let iters = iters.unwrap_or(B::ITERS);
|
||||
let d = B::preprocess()?;
|
||||
let start = std::time::Instant::now();
|
||||
for _iter in 0..iters {
|
||||
let _res = black_box(B::run_one(black_box(&d))?);
|
||||
}
|
||||
println!("{:?}", start.elapsed() / iters as u32);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
run::<Matmul>(None)?;
|
||||
Ok(())
|
||||
}
|
Reference in New Issue
Block a user