From fcfdcbd3373fb2fd744a0b4f7aa97cec7e620431 Mon Sep 17 00:00:00 2001
From: Laurent Mazare <laurent.mazare@gmail.com>
Date: Wed, 9 Aug 2023 21:27:03 +0200
Subject: [PATCH] Add a conv1d benchmark based on the whisper sizes. (#377)

* Add a conv1d benchmark based on the whisper sizes.

* Enforce the batch-dim in conv1d.
---
 candle-core/examples/conv1d_benchmark.rs | 24 ++++++++++++++++++++++++
 candle-core/src/conv.rs                  |  7 ++-----
 candle-core/src/cpu_backend.rs           |  4 ++--
 candle-core/src/tensor.rs                | 13 +------------
 4 files changed, 29 insertions(+), 19 deletions(-)
 create mode 100644 candle-core/examples/conv1d_benchmark.rs
diff --git a/candle-core/examples/conv1d_benchmark.rs b/candle-core/examples/conv1d_benchmark.rs
new file mode 100644
index 00000000..52fae5e8
--- /dev/null
+++ b/candle-core/examples/conv1d_benchmark.rs
@@ -0,0 +1,24 @@
+#[cfg(feature = "mkl")]
+extern crate intel_mkl_src;
+
+#[cfg(feature = "accelerate")]
+extern crate accelerate_src;
+
+use anyhow::Result;
+use candle_core::{Device, Tensor};
+
+pub const N_ITERS: usize = 5;
+
+fn main() -> Result<()> {
+    let inp = Tensor::randn(0f32, 1., (1, 384, 3000), &Device::Cpu)?;
+    let w = Tensor::randn(0f32, 1., (384, 384, 3), &Device::Cpu)?;
+    let res = inp.conv1d(&w, 0, 1);
+    println!("{res:?}");
+    let start = std::time::Instant::now();
+    for i in 0..N_ITERS {
+        let res = inp.conv1d(&w, 0, 1);
+        println!("{i} {res:?}");
+    }
+    println!("{:?}", start.elapsed() / N_ITERS as u32);
+    Ok(())
+}
diff --git a/candle-core/src/conv.rs b/candle-core/src/conv.rs
index 30799459..e3fea861 100644
--- a/candle-core/src/conv.rs
+++ b/candle-core/src/conv.rs
@@ -1,6 +1,6 @@
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct ParamsConv1D {
-    pub(crate) b_size: Option<usize>,
+    pub(crate) b_size: usize,
     // Maybe we should have a version without l_in as this bit depends on the input and not only on
     // the weights.
     pub(crate) l_in: usize,
@@ -19,10 +19,7 @@ impl ParamsConv1D {
 
     pub(crate) fn out_dims(&self) -> Vec<usize> {
         let l_out = self.l_out();
-        match self.b_size {
-            None => vec![self.c_out, l_out],
-            Some(n) => vec![n, self.c_out, l_out],
-        }
+        vec![self.b_size, self.c_out, l_out]
     }
 }
 
diff --git a/candle-core/src/cpu_backend.rs b/candle-core/src/cpu_backend.rs
index 54f3f65b..238a9a69 100644
--- a/candle-core/src/cpu_backend.rs
+++ b/candle-core/src/cpu_backend.rs
@@ -1037,10 +1037,10 @@ impl<'a> Map2 for Conv1D<'a> {
         let (inp_s0, inp_s1, inp_s2) = crate::shape::dims3(inp_l.stride())?;
         let (k_s0, k_s1, k_s2) = crate::shape::dims3(k_l.stride())?;
         let l_out = p.l_out();
-        let dst_elems = p.c_out * l_out * p.b_size.unwrap_or(1);
+        let dst_elems = p.c_out * l_out * p.b_size;
         let mut dst = vec![T::zero(); dst_elems];
         // The output shape is [b_size, c_out, l_out]
-        for b_idx in 0..p.b_size.unwrap_or(1) {
+        for b_idx in 0..p.b_size {
             let inp_idx = b_idx * inp_s0;
             let dst_idx = b_idx * p.c_out * l_out;
             for dst_c_idx in 0..p.c_out {
diff --git a/candle-core/src/tensor.rs b/candle-core/src/tensor.rs
index c94c0390..c14a4e39 100644
--- a/candle-core/src/tensor.rs
+++ b/candle-core/src/tensor.rs
@@ -773,18 +773,7 @@ impl Tensor {
     /// Applies a 1D convolution over the input tensor.
     pub fn conv1d(&self, kernel: &Self, padding: usize, stride: usize) -> Result<Self> {
         let (c_out, c_in_k, k_size) = kernel.dims3()?;
-        let (b_size, c_in, l_in) = match *self.dims() {
-            [b_size, c_in, l_in] => (Some(b_size), c_in, l_in),
-            [c_in, l_in] => (None, c_in, l_in),
-            _ => Err(Error::Conv1dInvalidArgs {
-                inp_shape: self.shape().clone(),
-                k_shape: kernel.shape().clone(),
-                padding,
-                stride,
-                msg: "input rank is not 2 or 3",
-            }
-            .bt())?,
-        };
+        let (b_size, c_in, l_in) = self.dims3()?;
         if c_in != c_in_k {
             Err(Error::Conv1dInvalidArgs {
                 inp_shape: self.shape().clone(),