mirror of
https://github.com/huggingface/candle.git
synced 2025-06-20 12:06:35 +00:00
Conv1d optimize (#392)
* Reorder the conv1d loops in the cpu backend. * Optimize the 1d convolution. * Conv1D optimize. * Fix some clippy lints.
This commit is contained in:
28
candle-core/src/cpu_kernels.rs
Normal file
28
candle-core/src/cpu_kernels.rs
Normal file
@ -0,0 +1,28 @@
|
||||
pub trait VecDot: num_traits::NumAssign + Copy {
|
||||
/// Dot-product of two vectors.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The length of `lhs` and `rhs` have to be at least `len`. `res` has to point to a valid
|
||||
/// element.
|
||||
#[inline(always)]
|
||||
unsafe fn vec_dot(lhs: *const Self, rhs: *const Self, res: *mut Self, len: usize) {
|
||||
*res = Self::zero();
|
||||
for i in 0..len {
|
||||
*res += *lhs.add(i) * *rhs.add(i)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl VecDot for f32 {
|
||||
#[inline(always)]
|
||||
unsafe fn vec_dot(lhs: *const Self, rhs: *const Self, res: *mut Self, len: usize) {
|
||||
ggblas::ggml::vec_dot_f32(lhs, rhs, res, len)
|
||||
}
|
||||
}
|
||||
|
||||
impl VecDot for f64 {}
|
||||
impl VecDot for half::bf16 {}
|
||||
impl VecDot for half::f16 {}
|
||||
impl VecDot for u8 {}
|
||||
impl VecDot for u32 {}
|
Reference in New Issue
Block a user