mirror of
https://github.com/huggingface/candle.git
synced 2025-06-17 19:18:50 +00:00
Add a softmax bench. (#433)
* Add a softmax bench. * Add the vectorized sum reduce.
This commit is contained in:
@ -5,9 +5,18 @@ extern crate intel_mkl_src;
|
|||||||
#[cfg(feature = "accelerate")]
|
#[cfg(feature = "accelerate")]
|
||||||
extern crate accelerate_src;
|
extern crate accelerate_src;
|
||||||
|
|
||||||
use candle_core::{Device, Result, Tensor};
|
use candle_core::{Device, Result, Tensor, D};
|
||||||
use clap::{Parser, Subcommand};
|
use clap::{Parser, Subcommand};
|
||||||
|
|
||||||
|
fn softmax<D: candle_core::shape::Dim>(xs: &Tensor, dim: D) -> Result<Tensor> {
|
||||||
|
let dim = dim.to_index(xs.shape(), "softmax")?;
|
||||||
|
let max = xs.max_keepdim(dim)?;
|
||||||
|
let diff = xs.broadcast_sub(&max)?;
|
||||||
|
let num = diff.exp()?;
|
||||||
|
let den = num.sum_keepdim(dim)?;
|
||||||
|
num.broadcast_div(&den)
|
||||||
|
}
|
||||||
|
|
||||||
trait Benchmark {
|
trait Benchmark {
|
||||||
type PreProcessData;
|
type PreProcessData;
|
||||||
type RunResult;
|
type RunResult;
|
||||||
@ -72,6 +81,23 @@ impl Benchmark for Matmul {
|
|||||||
const ITERS: usize = 100;
|
const ITERS: usize = 100;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct Softmax;
|
||||||
|
impl Benchmark for Softmax {
|
||||||
|
type PreProcessData = Tensor;
|
||||||
|
type RunResult = Tensor;
|
||||||
|
fn preprocess() -> Result<Self::PreProcessData> {
|
||||||
|
// Typical whisper tiny size.
|
||||||
|
let x = Tensor::randn(0f32, 1., (1, 6, 200, 1500), &Device::Cpu)?;
|
||||||
|
Ok(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run_one(d: &Self::PreProcessData) -> Result<Self::RunResult> {
|
||||||
|
softmax(d, D::Minus1)
|
||||||
|
}
|
||||||
|
|
||||||
|
const ITERS: usize = 100;
|
||||||
|
}
|
||||||
|
|
||||||
fn run<B: Benchmark>(iters: Option<usize>) -> Result<()> {
|
fn run<B: Benchmark>(iters: Option<usize>) -> Result<()> {
|
||||||
use std::hint::black_box;
|
use std::hint::black_box;
|
||||||
|
|
||||||
@ -90,6 +116,7 @@ enum Task {
|
|||||||
Conv1d,
|
Conv1d,
|
||||||
Conv2d,
|
Conv2d,
|
||||||
Matmul,
|
Matmul,
|
||||||
|
Softmax,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
@ -109,6 +136,7 @@ fn main() -> Result<()> {
|
|||||||
Task::Conv1d => run::<Conv1d>(args.iters)?,
|
Task::Conv1d => run::<Conv1d>(args.iters)?,
|
||||||
Task::Conv2d => run::<Conv2d>(args.iters)?,
|
Task::Conv2d => run::<Conv2d>(args.iters)?,
|
||||||
Task::Matmul => run::<Matmul>(args.iters)?,
|
Task::Matmul => run::<Matmul>(args.iters)?,
|
||||||
|
Task::Softmax => run::<Softmax>(args.iters)?,
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -278,17 +278,17 @@ impl Map1Any for ReduceIndex {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Reduce<'a> {
|
struct ReduceSum<'a> {
|
||||||
dst_shape: &'a Shape,
|
dst_shape: &'a Shape,
|
||||||
reduce_dims: &'a [usize],
|
reduce_dims: &'a [usize],
|
||||||
reduce_dims_and_stride: Vec<(usize, usize)>,
|
reduce_dims_and_stride: Vec<(usize, usize)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Reduce<'a> {
|
impl<'a> ReduceSum<'a> {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn fold_impl<T, F>(&self, src: &[T], src_l: &Layout, start_elt: T, f: F) -> Result<Vec<T>>
|
fn fold_impl<T, F>(&self, src: &[T], src_l: &Layout, start_elt: T, f: F) -> Result<Vec<T>>
|
||||||
where
|
where
|
||||||
T: Clone + Copy,
|
T: WithDType,
|
||||||
F: Fn(T, T) -> T,
|
F: Fn(T, T) -> T,
|
||||||
{
|
{
|
||||||
let mut dst = vec![start_elt; self.dst_shape.elem_count()];
|
let mut dst = vec![start_elt; self.dst_shape.elem_count()];
|
||||||
@ -312,9 +312,13 @@ impl<'a> Reduce<'a> {
|
|||||||
.product::<usize>();
|
.product::<usize>();
|
||||||
for (dst_i, dst_v) in dst.iter_mut().enumerate() {
|
for (dst_i, dst_v) in dst.iter_mut().enumerate() {
|
||||||
let src_i = dst_i * reduce_sz;
|
let src_i = dst_i * reduce_sz;
|
||||||
for &s in src[src_i..src_i + reduce_sz].iter() {
|
unsafe {
|
||||||
*dst_v = f(*dst_v, s)
|
T::vec_reduce_sum(
|
||||||
}
|
src[src_i..src_i + reduce_sz].as_ptr(),
|
||||||
|
dst_v,
|
||||||
|
reduce_sz,
|
||||||
|
)
|
||||||
|
};
|
||||||
}
|
}
|
||||||
return Ok(dst);
|
return Ok(dst);
|
||||||
};
|
};
|
||||||
@ -346,7 +350,7 @@ impl<'a> Reduce<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Map1 for Reduce<'a> {
|
impl<'a> Map1 for ReduceSum<'a> {
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn f<T: WithDType>(&self, src: &[T], src_l: &Layout) -> Result<Vec<T>> {
|
fn f<T: WithDType>(&self, src: &[T], src_l: &Layout) -> Result<Vec<T>> {
|
||||||
self.fold_impl(src, src_l, T::zero(), |x, y| x + y)
|
self.fold_impl(src, src_l, T::zero(), |x, y| x + y)
|
||||||
@ -1697,7 +1701,7 @@ impl BackendStorage for CpuStorage {
|
|||||||
.iter()
|
.iter()
|
||||||
.map(|&d| (src_dims[d], src_dims[d + 1..].iter().product::<usize>()))
|
.map(|&d| (src_dims[d], src_dims[d + 1..].iter().product::<usize>()))
|
||||||
.collect();
|
.collect();
|
||||||
Reduce {
|
ReduceSum {
|
||||||
dst_shape: &dst_shape,
|
dst_shape: &dst_shape,
|
||||||
reduce_dims: &reduce_dims,
|
reduce_dims: &reduce_dims,
|
||||||
reduce_dims_and_stride,
|
reduce_dims_and_stride,
|
||||||
|
@ -12,6 +12,20 @@ pub trait VecDot: num_traits::NumAssign + Copy {
|
|||||||
*res += *lhs.add(i) * *rhs.add(i)
|
*res += *lhs.add(i) * *rhs.add(i)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Sum of all elements in a vector.
|
||||||
|
///
|
||||||
|
/// # Safety
|
||||||
|
///
|
||||||
|
/// The length of `xs` must be at least `len`. `res` has to point to a valid
|
||||||
|
/// element.
|
||||||
|
#[inline(always)]
|
||||||
|
unsafe fn vec_reduce_sum(xs: *const Self, res: *mut Self, len: usize) {
|
||||||
|
*res = Self::zero();
|
||||||
|
for i in 0..len {
|
||||||
|
*res += *xs.add(i)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VecDot for f32 {
|
impl VecDot for f32 {
|
||||||
@ -19,6 +33,12 @@ impl VecDot for f32 {
|
|||||||
unsafe fn vec_dot(lhs: *const Self, rhs: *const Self, res: *mut Self, len: usize) {
|
unsafe fn vec_dot(lhs: *const Self, rhs: *const Self, res: *mut Self, len: usize) {
|
||||||
ggblas::ggml::vec_dot_f32(lhs, rhs, res, len)
|
ggblas::ggml::vec_dot_f32(lhs, rhs, res, len)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: enable the following once the updated ggblas is available.
|
||||||
|
// #[inline(always)]
|
||||||
|
// unsafe fn vec_reduce_sum(xs: *const Self, res: *mut Self, len: usize) {
|
||||||
|
// ggblas::ggml::vec_reduce_sum(xs, res, len)
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VecDot for f64 {}
|
impl VecDot for f64 {}
|
||||||
|
Reference in New Issue
Block a user