More accelerate optimizations (#427)

* Add more tracing to the whisper example.

* Support accelerate in more examples.

* Use accelerate for pointwise functions.

* Use accelerate for binary operations too.

* Bugfix for binary operation: use the rhs before the lhs.
This commit is contained in:
Laurent Mazare
2023-08-13 13:53:34 +02:00
committed by GitHub
parent 60cd1551ca
commit 9aca398a4f
9 changed files with 320 additions and 11 deletions

View File

@ -338,6 +338,21 @@ macro_rules! bin_op {
fn f64_vec(xs1: &[f64], xs2: &[f64], ys: &mut [f64]) {
crate::mkl::$f64_vec(xs1, xs2, ys)
}
#[cfg(feature = "accelerate")]
const F32_VEC: bool = true;
#[cfg(feature = "accelerate")]
const F64_VEC: bool = true;
#[cfg(feature = "accelerate")]
#[inline(always)]
fn f32_vec(xs1: &[f32], xs2: &[f32], ys: &mut [f32]) {
crate::accelerate::$f32_vec(xs1, xs2, ys)
}
#[cfg(feature = "accelerate")]
#[inline(always)]
fn f64_vec(xs1: &[f64], xs2: &[f64], ys: &mut [f64]) {
crate::accelerate::$f64_vec(xs1, xs2, ys)
}
}
};
}
@ -424,6 +439,21 @@ macro_rules! unary_op {
fn f64_vec(xs: &[f64], ys: &mut [f64]) {
crate::mkl::$f64_vec(xs, ys)
}
#[cfg(feature = "accelerate")]
const F32_VEC: bool = true;
#[cfg(feature = "accelerate")]
const F64_VEC: bool = true;
#[cfg(feature = "accelerate")]
#[inline(always)]
fn f32_vec(xs: &[f32], ys: &mut [f32]) {
crate::accelerate::$f32_vec(xs, ys)
}
#[cfg(feature = "accelerate")]
#[inline(always)]
fn f64_vec(xs: &[f64], ys: &mut [f64]) {
crate::accelerate::$f64_vec(xs, ys)
}
}
};
}