mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 18:48:51 +00:00
Small example for benchmarking some cpu ops (#394)
* Refactor the benchmark example. * Rename the example. * Add some comments.
This commit is contained in:
@ -30,6 +30,7 @@ zip = { workspace = true }
|
|||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
anyhow = { workspace = true }
|
anyhow = { workspace = true }
|
||||||
|
clap = { workspace = true }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = []
|
default = []
|
||||||
|
@ -1,24 +0,0 @@
|
|||||||
#[cfg(feature = "mkl")]
|
|
||||||
extern crate intel_mkl_src;
|
|
||||||
|
|
||||||
#[cfg(feature = "accelerate")]
|
|
||||||
extern crate accelerate_src;
|
|
||||||
|
|
||||||
use anyhow::Result;
|
|
||||||
use candle_core::{Device, Tensor};
|
|
||||||
|
|
||||||
pub const N_ITERS: usize = 5;
|
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
|
||||||
let inp = Tensor::randn(0f32, 1., (1, 384, 3000), &Device::Cpu)?;
|
|
||||||
let w = Tensor::randn(0f32, 1., (384, 384, 3), &Device::Cpu)?;
|
|
||||||
let res = inp.conv1d(&w, 0, 1);
|
|
||||||
println!("{res:?}");
|
|
||||||
let start = std::time::Instant::now();
|
|
||||||
for i in 0..N_ITERS {
|
|
||||||
let res = inp.conv1d(&w, 0, 1);
|
|
||||||
println!("{i} {res:?}");
|
|
||||||
}
|
|
||||||
println!("{:?}", start.elapsed() / N_ITERS as u32);
|
|
||||||
Ok(())
|
|
||||||
}
|
|
95
candle-core/examples/cpu_benchmarks.rs
Normal file
95
candle-core/examples/cpu_benchmarks.rs
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
/// This example contains some simple benchmarks so that it's easy to run them in perf etc.
|
||||||
|
#[cfg(feature = "mkl")]
|
||||||
|
extern crate intel_mkl_src;
|
||||||
|
|
||||||
|
#[cfg(feature = "accelerate")]
|
||||||
|
extern crate accelerate_src;
|
||||||
|
|
||||||
|
use candle_core::{Device, Result, Tensor};
|
||||||
|
use clap::{Parser, Subcommand};
|
||||||
|
|
||||||
|
trait Benchmark {
|
||||||
|
type PreProcessData;
|
||||||
|
type RunResult;
|
||||||
|
|
||||||
|
fn preprocess() -> Result<Self::PreProcessData>;
|
||||||
|
fn run_one(_: &Self::PreProcessData) -> Result<Self::RunResult>;
|
||||||
|
|
||||||
|
const ITERS: usize;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Conv1d example as used in whisper.
|
||||||
|
struct Conv1d;
|
||||||
|
impl Benchmark for Conv1d {
|
||||||
|
type PreProcessData = (Tensor, Tensor);
|
||||||
|
type RunResult = Tensor;
|
||||||
|
fn preprocess() -> Result<Self::PreProcessData> {
|
||||||
|
let inp = Tensor::randn(0f32, 1., (1, 384, 3000), &Device::Cpu)?;
|
||||||
|
let w = Tensor::randn(0f32, 1., (384, 384, 3), &Device::Cpu)?;
|
||||||
|
Ok((inp, w))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run_one(d: &Self::PreProcessData) -> Result<Self::RunResult> {
|
||||||
|
d.0.conv1d(&d.1, 0, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
const ITERS: usize = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Conv2d example as used in stable-diffusion.
|
||||||
|
struct Conv2d;
|
||||||
|
impl Benchmark for Conv2d {
|
||||||
|
type PreProcessData = (Tensor, Tensor);
|
||||||
|
type RunResult = Tensor;
|
||||||
|
|
||||||
|
fn preprocess() -> Result<Self::PreProcessData> {
|
||||||
|
let inp = Tensor::randn(0f32, 1., (2, 320, 96, 96), &Device::Cpu)?;
|
||||||
|
let w = Tensor::randn(0f32, 1., (320, 320, 3, 3), &Device::Cpu)?;
|
||||||
|
Ok((inp, w))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run_one(d: &Self::PreProcessData) -> Result<Self::RunResult> {
|
||||||
|
d.0.conv2d(&d.1, 0, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
const ITERS: usize = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run<B: Benchmark>(iters: Option<usize>) -> Result<()> {
|
||||||
|
use std::hint::black_box;
|
||||||
|
|
||||||
|
let iters = iters.unwrap_or(B::ITERS);
|
||||||
|
let d = B::preprocess()?;
|
||||||
|
let start = std::time::Instant::now();
|
||||||
|
for _iter in 0..iters {
|
||||||
|
let _res = black_box(B::run_one(black_box(&d))?);
|
||||||
|
}
|
||||||
|
println!("{:?}", start.elapsed() / iters as u32);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Subcommand, Debug, Clone)]
|
||||||
|
enum Task {
|
||||||
|
Conv1d,
|
||||||
|
Conv2d,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(author, version, about, long_about = None)]
|
||||||
|
pub struct Args {
|
||||||
|
/// The benchmark to be run.
|
||||||
|
#[command(subcommand)]
|
||||||
|
task: Task,
|
||||||
|
|
||||||
|
#[arg(long)]
|
||||||
|
iters: Option<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() -> Result<()> {
|
||||||
|
let args = Args::parse();
|
||||||
|
match args.task {
|
||||||
|
Task::Conv1d => run::<Conv1d>(args.iters)?,
|
||||||
|
Task::Conv2d => run::<Conv2d>(args.iters)?,
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
Reference in New Issue
Block a user