mirror of
https://github.com/huggingface/candle.git
synced 2025-06-20 12:06:35 +00:00
More accelerate optimizations (#427)
* Add more tracing to the whisper example. * Support accelerate in more examples. * Use accelerate for pointwise functions. * Use accelerate for binary operations too. * Bugfix for binary operation: use the rhs before the lhs.
This commit is contained in:
@ -1,5 +1,5 @@
|
||||
#![allow(dead_code)]
|
||||
use libc::{c_char, c_double, c_float, c_int};
|
||||
use libc::{c_char, c_double, c_float, c_int, c_long, c_ulong};
|
||||
|
||||
mod ffi {
|
||||
use super::*;
|
||||
@ -39,6 +39,90 @@ mod ffi {
|
||||
c: *mut c_double,
|
||||
ldc: *const c_int,
|
||||
);
|
||||
|
||||
pub fn vvexpf(dst: *mut c_float, src: *const c_float, len: *const c_int);
|
||||
pub fn vvexp(dst: *mut c_double, src: *const c_double, len: *const c_int);
|
||||
pub fn vvsqrtf(dst: *mut c_float, src: *const c_float, len: *const c_int);
|
||||
pub fn vvsqrt(dst: *mut c_double, src: *const c_double, len: *const c_int);
|
||||
pub fn vvsinf(dst: *mut c_float, src: *const c_float, len: *const c_int);
|
||||
pub fn vvsin(dst: *mut c_double, src: *const c_double, len: *const c_int);
|
||||
pub fn vvcosf(dst: *mut c_float, src: *const c_float, len: *const c_int);
|
||||
pub fn vvcos(dst: *mut c_double, src: *const c_double, len: *const c_int);
|
||||
pub fn vvlogf(dst: *mut c_float, src: *const c_float, len: *const c_int);
|
||||
pub fn vvlog(dst: *mut c_double, src: *const c_double, len: *const c_int);
|
||||
|
||||
pub fn vDSP_vaddD(
|
||||
_: *const c_double,
|
||||
_: c_long,
|
||||
_: *const c_double,
|
||||
_: c_long,
|
||||
_: *mut c_double,
|
||||
_: c_long,
|
||||
_: c_ulong,
|
||||
);
|
||||
pub fn vDSP_vadd(
|
||||
_: *const c_float,
|
||||
_: c_long,
|
||||
_: *const c_float,
|
||||
_: c_long,
|
||||
_: *mut c_float,
|
||||
_: c_long,
|
||||
_: c_ulong,
|
||||
);
|
||||
pub fn vDSP_vsubD(
|
||||
_: *const c_double,
|
||||
_: c_long,
|
||||
_: *const c_double,
|
||||
_: c_long,
|
||||
_: *mut c_double,
|
||||
_: c_long,
|
||||
_: c_ulong,
|
||||
);
|
||||
pub fn vDSP_vsub(
|
||||
_: *const c_float,
|
||||
_: c_long,
|
||||
_: *const c_float,
|
||||
_: c_long,
|
||||
_: *mut c_float,
|
||||
_: c_long,
|
||||
_: c_ulong,
|
||||
);
|
||||
pub fn vDSP_vmulD(
|
||||
_: *const c_double,
|
||||
_: c_long,
|
||||
_: *const c_double,
|
||||
_: c_long,
|
||||
_: *mut c_double,
|
||||
_: c_long,
|
||||
_: c_ulong,
|
||||
);
|
||||
pub fn vDSP_vmul(
|
||||
_: *const c_float,
|
||||
_: c_long,
|
||||
_: *const c_float,
|
||||
_: c_long,
|
||||
_: *mut c_float,
|
||||
_: c_long,
|
||||
_: c_ulong,
|
||||
);
|
||||
pub fn vDSP_vdivD(
|
||||
_: *const c_double,
|
||||
_: c_long,
|
||||
_: *const c_double,
|
||||
_: c_long,
|
||||
_: *mut c_double,
|
||||
_: c_long,
|
||||
_: c_ulong,
|
||||
);
|
||||
pub fn vDSP_vdiv(
|
||||
_: *const c_float,
|
||||
_: c_long,
|
||||
_: *const c_float,
|
||||
_: c_long,
|
||||
_: *mut c_float,
|
||||
_: c_long,
|
||||
_: c_ulong,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@ -109,3 +193,158 @@ pub unsafe fn dgemm(
|
||||
&ldc,
|
||||
)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn vs_exp(a: &[f32], y: &mut [f32]) {
|
||||
let a_len = a.len();
|
||||
let y_len = y.len();
|
||||
if a_len != y_len {
|
||||
panic!("a and y have different lengths {a_len} <> {y_len}")
|
||||
}
|
||||
unsafe { ffi::vvexpf(y.as_mut_ptr(), a.as_ptr(), &(a_len as i32)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn vd_exp(a: &[f64], y: &mut [f64]) {
|
||||
let a_len = a.len();
|
||||
let y_len = y.len();
|
||||
if a_len != y_len {
|
||||
panic!("a and y have different lengths {a_len} <> {y_len}")
|
||||
}
|
||||
unsafe { ffi::vvexp(y.as_mut_ptr(), a.as_ptr(), &(a_len as i32)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn vs_sqrt(a: &[f32], y: &mut [f32]) {
|
||||
let a_len = a.len();
|
||||
let y_len = y.len();
|
||||
if a_len != y_len {
|
||||
panic!("a and y have different lengths {a_len} <> {y_len}")
|
||||
}
|
||||
unsafe { ffi::vvsqrtf(y.as_mut_ptr(), a.as_ptr(), &(a_len as i32)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn vd_sqrt(a: &[f64], y: &mut [f64]) {
|
||||
let a_len = a.len();
|
||||
let y_len = y.len();
|
||||
if a_len != y_len {
|
||||
panic!("a and y have different lengths {a_len} <> {y_len}")
|
||||
}
|
||||
unsafe { ffi::vvsqrt(y.as_mut_ptr(), a.as_ptr(), &(a_len as i32)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn vs_sin(a: &[f32], y: &mut [f32]) {
|
||||
let a_len = a.len();
|
||||
let y_len = y.len();
|
||||
if a_len != y_len {
|
||||
panic!("a and y have different lengths {a_len} <> {y_len}")
|
||||
}
|
||||
unsafe { ffi::vvsinf(y.as_mut_ptr(), a.as_ptr(), &(a_len as i32)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn vd_sin(a: &[f64], y: &mut [f64]) {
|
||||
let a_len = a.len();
|
||||
let y_len = y.len();
|
||||
if a_len != y_len {
|
||||
panic!("a and y have different lengths {a_len} <> {y_len}")
|
||||
}
|
||||
unsafe { ffi::vvsin(y.as_mut_ptr(), a.as_ptr(), &(a_len as i32)) }
|
||||
}
|
||||
#[inline]
|
||||
pub fn vs_cos(a: &[f32], y: &mut [f32]) {
|
||||
let a_len = a.len();
|
||||
let y_len = y.len();
|
||||
if a_len != y_len {
|
||||
panic!("a and y have different lengths {a_len} <> {y_len}")
|
||||
}
|
||||
unsafe { ffi::vvcosf(y.as_mut_ptr(), a.as_ptr(), &(a_len as i32)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn vd_cos(a: &[f64], y: &mut [f64]) {
|
||||
let a_len = a.len();
|
||||
let y_len = y.len();
|
||||
if a_len != y_len {
|
||||
panic!("a and y have different lengths {a_len} <> {y_len}")
|
||||
}
|
||||
unsafe { ffi::vvcos(y.as_mut_ptr(), a.as_ptr(), &(a_len as i32)) }
|
||||
}
|
||||
#[inline]
|
||||
pub fn vs_ln(a: &[f32], y: &mut [f32]) {
|
||||
let a_len = a.len();
|
||||
let y_len = y.len();
|
||||
if a_len != y_len {
|
||||
panic!("a and y have different lengths {a_len} <> {y_len}")
|
||||
}
|
||||
unsafe { ffi::vvlogf(y.as_mut_ptr(), a.as_ptr(), &(a_len as i32)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn vd_ln(a: &[f64], y: &mut [f64]) {
|
||||
let a_len = a.len();
|
||||
let y_len = y.len();
|
||||
if a_len != y_len {
|
||||
panic!("a and y have different lengths {a_len} <> {y_len}")
|
||||
}
|
||||
unsafe { ffi::vvlog(y.as_mut_ptr(), a.as_ptr(), &(a_len as i32)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn vs_sqr(a: &[f32], y: &mut [f32]) {
|
||||
let a_len = a.len();
|
||||
let y_len = y.len();
|
||||
if a_len != y_len {
|
||||
panic!("a and y have different lengths {a_len} <> {y_len}")
|
||||
}
|
||||
y.iter_mut().zip(a.iter()).for_each(|(y, a)| *y = *a * *a)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn vd_sqr(a: &[f64], y: &mut [f64]) {
|
||||
let a_len = a.len();
|
||||
let y_len = y.len();
|
||||
if a_len != y_len {
|
||||
panic!("a and y have different lengths {a_len} <> {y_len}")
|
||||
}
|
||||
y.iter_mut().zip(a.iter()).for_each(|(y, a)| *y = *a * *a)
|
||||
}
|
||||
|
||||
macro_rules! binary_op {
|
||||
($fn_name:ident, $ty:ty, $accelerate_name:ident) => {
|
||||
#[inline]
|
||||
pub fn $fn_name(a: &[$ty], b: &[$ty], y: &mut [$ty]) {
|
||||
let a_len = a.len();
|
||||
let b_len = b.len();
|
||||
let y_len = y.len();
|
||||
if a_len != y_len || b_len != y_len {
|
||||
panic!(
|
||||
"{} a,b,y len mismatch {a_len} {b_len} {y_len}",
|
||||
stringify!($fn_name)
|
||||
);
|
||||
}
|
||||
unsafe {
|
||||
// Weird quirk of accelerate, the rhs comes before the lhs.
|
||||
ffi::$accelerate_name(
|
||||
b.as_ptr(),
|
||||
1,
|
||||
a.as_ptr(),
|
||||
1,
|
||||
y.as_mut_ptr(),
|
||||
1,
|
||||
a_len as u64,
|
||||
)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
binary_op!(vs_add, f32, vDSP_vadd);
|
||||
binary_op!(vd_add, f64, vDSP_vaddD);
|
||||
binary_op!(vs_sub, f32, vDSP_vsub);
|
||||
binary_op!(vd_sub, f64, vDSP_vsubD);
|
||||
binary_op!(vs_mul, f32, vDSP_vmul);
|
||||
binary_op!(vd_mul, f64, vDSP_vmulD);
|
||||
binary_op!(vs_div, f32, vDSP_vdiv);
|
||||
binary_op!(vd_div, f64, vDSP_vdivD);
|
||||
|
Reference in New Issue
Block a user