From fcfdcbd3373fb2fd744a0b4f7aa97cec7e620431 Mon Sep 17 00:00:00 2001 From: Laurent Mazare Date: Wed, 9 Aug 2023 21:27:03 +0200 Subject: [PATCH] Add a conv1d benchmark based on the whisper sizes. (#377) * Add a conv1d benchmark based on the whisper sizes. * Enforce the batch-dim in conv1d. --- candle-core/examples/conv1d_benchmark.rs | 24 ++++++++++++++++++++++++ candle-core/src/conv.rs | 7 ++----- candle-core/src/cpu_backend.rs | 4 ++-- candle-core/src/tensor.rs | 13 +------------ 4 files changed, 29 insertions(+), 19 deletions(-) create mode 100644 candle-core/examples/conv1d_benchmark.rs diff --git a/candle-core/examples/conv1d_benchmark.rs b/candle-core/examples/conv1d_benchmark.rs new file mode 100644 index 00000000..52fae5e8 --- /dev/null +++ b/candle-core/examples/conv1d_benchmark.rs @@ -0,0 +1,24 @@ +#[cfg(feature = "mkl")] +extern crate intel_mkl_src; + +#[cfg(feature = "accelerate")] +extern crate accelerate_src; + +use anyhow::Result; +use candle_core::{Device, Tensor}; + +pub const N_ITERS: usize = 5; + +fn main() -> Result<()> { + let inp = Tensor::randn(0f32, 1., (1, 384, 3000), &Device::Cpu)?; + let w = Tensor::randn(0f32, 1., (384, 384, 3), &Device::Cpu)?; + let res = inp.conv1d(&w, 0, 1); + println!("{res:?}"); + let start = std::time::Instant::now(); + for i in 0..N_ITERS { + let res = inp.conv1d(&w, 0, 1); + println!("{i} {res:?}"); + } + println!("{:?}", start.elapsed() / N_ITERS as u32); + Ok(()) +} diff --git a/candle-core/src/conv.rs b/candle-core/src/conv.rs index 30799459..e3fea861 100644 --- a/candle-core/src/conv.rs +++ b/candle-core/src/conv.rs @@ -1,6 +1,6 @@ #[derive(Debug, Clone, PartialEq, Eq)] pub struct ParamsConv1D { - pub(crate) b_size: Option, + pub(crate) b_size: usize, // Maybe we should have a version without l_in as this bit depends on the input and not only on // the weights. pub(crate) l_in: usize, @@ -19,10 +19,7 @@ impl ParamsConv1D { pub(crate) fn out_dims(&self) -> Vec { let l_out = self.l_out(); - match self.b_size { - None => vec![self.c_out, l_out], - Some(n) => vec![n, self.c_out, l_out], - } + vec![self.b_size, self.c_out, l_out] } } diff --git a/candle-core/src/cpu_backend.rs b/candle-core/src/cpu_backend.rs index 54f3f65b..238a9a69 100644 --- a/candle-core/src/cpu_backend.rs +++ b/candle-core/src/cpu_backend.rs @@ -1037,10 +1037,10 @@ impl<'a> Map2 for Conv1D<'a> { let (inp_s0, inp_s1, inp_s2) = crate::shape::dims3(inp_l.stride())?; let (k_s0, k_s1, k_s2) = crate::shape::dims3(k_l.stride())?; let l_out = p.l_out(); - let dst_elems = p.c_out * l_out * p.b_size.unwrap_or(1); + let dst_elems = p.c_out * l_out * p.b_size; let mut dst = vec![T::zero(); dst_elems]; // The output shape is [b_size, c_out, l_out] - for b_idx in 0..p.b_size.unwrap_or(1) { + for b_idx in 0..p.b_size { let inp_idx = b_idx * inp_s0; let dst_idx = b_idx * p.c_out * l_out; for dst_c_idx in 0..p.c_out { diff --git a/candle-core/src/tensor.rs b/candle-core/src/tensor.rs index c94c0390..c14a4e39 100644 --- a/candle-core/src/tensor.rs +++ b/candle-core/src/tensor.rs @@ -773,18 +773,7 @@ impl Tensor { /// Applies a 1D convolution over the input tensor. pub fn conv1d(&self, kernel: &Self, padding: usize, stride: usize) -> Result { let (c_out, c_in_k, k_size) = kernel.dims3()?; - let (b_size, c_in, l_in) = match *self.dims() { - [b_size, c_in, l_in] => (Some(b_size), c_in, l_in), - [c_in, l_in] => (None, c_in, l_in), - _ => Err(Error::Conv1dInvalidArgs { - inp_shape: self.shape().clone(), - k_shape: kernel.shape().clone(), - padding, - stride, - msg: "input rank is not 2 or 3", - } - .bt())?, - }; + let (b_size, c_in, l_in) = self.dims3()?; if c_in != c_in_k { Err(Error::Conv1dInvalidArgs { inp_shape: self.shape().clone(),