Files
candle/candle-core/src/cpu_kernels.rs
Laurent Mazare c8039579a5 Conv1d optimize (#392)
* Reorder the conv1d loops in the cpu backend.

* Optimize the 1d convolution.

* Conv1D optimize.

* Fix some clippy lints.
2023-08-10 15:23:52 +01:00

29 lines
782 B
Rust

pub trait VecDot: num_traits::NumAssign + Copy {
/// Dot-product of two vectors.
///
/// # Safety
///
/// The length of `lhs` and `rhs` have to be at least `len`. `res` has to point to a valid
/// element.
#[inline(always)]
unsafe fn vec_dot(lhs: *const Self, rhs: *const Self, res: *mut Self, len: usize) {
*res = Self::zero();
for i in 0..len {
*res += *lhs.add(i) * *rhs.add(i)
}
}
}
impl VecDot for f32 {
#[inline(always)]
unsafe fn vec_dot(lhs: *const Self, rhs: *const Self, res: *mut Self, len: usize) {
ggblas::ggml::vec_dot_f32(lhs, rhs, res, len)
}
}
impl VecDot for f64 {}
impl VecDot for half::bf16 {}
impl VecDot for half::f16 {}
impl VecDot for u8 {}
impl VecDot for u32 {}