Conv1d optimize (#392)

* Reorder the conv1d loops in the cpu backend. * Optimize the 1d convolution. * Conv1D optimize. * Fix some clippy lints.
2025-06-20 12:06:35 +00:00 · 2023-08-10 16:23:52 +02:00
parent 0b0fa56978
commit c8039579a5
6 changed files with 62 additions and 22 deletions
--- a/candle-core/src/cpu_kernels.rs
+++ b/candle-core/src/cpu_kernels.rs
@ -0,0 +1,28 @@
+pub trait VecDot: num_traits::NumAssign + Copy {
+    /// Dot-product of two vectors.
+    ///
+    /// # Safety
+    ///
+    /// The length of `lhs` and `rhs` have to be at least `len`. `res` has to point to a valid
+    /// element.
+    #[inline(always)]
+    unsafe fn vec_dot(lhs: *const Self, rhs: *const Self, res: *mut Self, len: usize) {
+        *res = Self::zero();
+        for i in 0..len {
+            *res += *lhs.add(i) * *rhs.add(i)
+        }
+    }
+}
+
+impl VecDot for f32 {
+    #[inline(always)]
+    unsafe fn vec_dot(lhs: *const Self, rhs: *const Self, res: *mut Self, len: usize) {
+        ggblas::ggml::vec_dot_f32(lhs, rhs, res, len)
+    }
+}
+
+impl VecDot for f64 {}
+impl VecDot for half::bf16 {}
+impl VecDot for half::f16 {}
+impl VecDot for u8 {}
+impl VecDot for u32 {}