Add conv-transpose. (#635)

* Add conv-transpose. * Return zeros for now. * Naive CPU implementation. * Add a conv-transpose test + fix the cpu implementation. * Add a second test.
2025-06-17 02:58:50 +00:00 · 2023-08-28 10:10:12 +01:00
parent 26e1b40992
commit 3cca89cc70
9 changed files with 244 additions and 0 deletions
--- a/candle-core/src/backend.rs
+++ b/candle-core/src/backend.rs
@ -45,6 +45,14 @@ pub trait BackendStorage: Sized {
        _params: &crate::conv::ParamsConv2D,
    ) -> Result<Self>;

+    fn conv_transpose2d(
+        &self,
+        _l: &Layout,
+        _kernel: &Self,
+        _kernel_l: &Layout,
+        _params: &crate::conv::ParamsConvTranspose2D,
+    ) -> Result<Self>;
+
    fn avg_pool2d(&self, _: &Layout, _: (usize, usize), _: (usize, usize)) -> Result<Self>;
    fn max_pool2d(&self, _: &Layout, _: (usize, usize), _: (usize, usize)) -> Result<Self>;
    fn upsample_nearest2d(&self, _: &Layout, _: usize, _: usize) -> Result<Self>;
--- a/candle-core/src/backprop.rs
+++ b/candle-core/src/backprop.rs
@ -60,6 +60,11 @@ impl Tensor {
                        kernel: rhs,
                        ..
                    }
+                    | Op::ConvTranspose2D {
+                        arg: lhs,
+                        kernel: rhs,
+                        ..
+                    }
                    | Op::CustomOp2(lhs, rhs, _)
                    | Op::Binary(lhs, rhs, _)
                    | Op::Gather(lhs, rhs, _)
@ -188,6 +193,9 @@ impl Tensor {
                    }
                    Op::Conv1D { .. } => Err(Error::BackwardNotSupported { op: "conv1d" })?,
                    Op::Conv2D { .. } => Err(Error::BackwardNotSupported { op: "conv2d" })?,
+                    Op::ConvTranspose2D { .. } => Err(Error::BackwardNotSupported {
+                        op: "conv-transpose2d",
+                    })?,
                    Op::AvgPool2D { .. } => Err(Error::BackwardNotSupported { op: "avg-pool2d" })?,
                    Op::MaxPool2D { .. } => Err(Error::BackwardNotSupported { op: "max-pool2d" })?,
                    Op::UpsampleNearest2D { .. } => Err(Error::BackwardNotSupported {
--- a/candle-core/src/conv.rs
+++ b/candle-core/src/conv.rs
@ -54,6 +54,42 @@ impl ParamsConv2D {
    }
 }

+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ParamsConvTranspose2D {
+    pub(crate) b_size: usize,
+    pub(crate) i_h: usize,
+    pub(crate) i_w: usize,
+    pub(crate) k_h: usize,
+    pub(crate) k_w: usize,
+    pub(crate) c_out: usize,
+    pub(crate) c_in: usize,
+    pub(crate) padding: usize,
+    pub(crate) output_padding: usize,
+    pub(crate) stride: usize,
+}
+
+impl ParamsConvTranspose2D {
+    pub(crate) fn out_h(&self) -> usize {
+        let dilation = 1;
+        (self.i_h - 1) * self.stride - 2 * self.padding
+            + dilation * (self.k_h - 1)
+            + self.output_padding
+            + 1
+    }
+
+    pub(crate) fn out_w(&self) -> usize {
+        let dilation = 1;
+        (self.i_w - 1) * self.stride - 2 * self.padding
+            + dilation * (self.k_w - 1)
+            + self.output_padding
+            + 1
+    }
+
+    pub(crate) fn out_dims(&self) -> Vec<usize> {
+        vec![self.b_size, self.c_out, self.out_h(), self.out_w()]
+    }
+}
+
 impl Tensor {
    fn conv1d_single_group(&self, kernel: &Self, params: &ParamsConv1D) -> Result<Self> {
        let storage =
@ -166,4 +202,46 @@ impl Tensor {
            Tensor::cat(&blocks, 1)
        }
    }
+
+    /// Applies a 2D transposed convolution over the input tensor.
+    pub fn conv_transpose2d(
+        &self,
+        kernel: &Self,
+        padding: usize,
+        output_padding: usize,
+        stride: usize,
+    ) -> Result<Self> {
+        let (b_size, c_in, i_h, i_w) = self.dims4()?;
+        let (c_in_k, c_out, k_h, k_w) = kernel.dims4()?;
+        if c_in != c_in_k {
+            crate::bail!("in_channel mismatch between input ({c_in}) and kernel ({c_in_k})")
+        }
+        let params = ParamsConvTranspose2D {
+            b_size,
+            i_h,
+            i_w,
+            k_h,
+            k_w,
+            c_out,
+            c_in,
+            padding,
+            output_padding,
+            stride,
+        };
+        let storage = self.storage().conv_transpose2d(
+            self.layout(),
+            &kernel.storage(),
+            kernel.layout(),
+            &params,
+        )?;
+        let op = BackpropOp::new2(self, kernel, |arg, kernel| Op::ConvTranspose2D {
+            arg,
+            kernel,
+            padding: params.padding,
+            output_padding: params.output_padding,
+            stride: params.stride,
+        });
+        let out_dims = params.out_dims();
+        Ok(crate::tensor::from_storage(storage, out_dims, op, false))
+    }
 }
--- a/candle-core/src/cpu_backend.rs
+++ b/candle-core/src/cpu_backend.rs
@ -1180,6 +1180,60 @@ impl<'a> Map2 for Conv2D<'a> {
    }
 }

+struct ConvTranspose2D<'a>(&'a crate::conv::ParamsConvTranspose2D);
+
+impl<'a> Map2 for ConvTranspose2D<'a> {
+    const OP: &'static str = "conv_transpose2d";
+    fn f<T: WithDType>(&self, inp: &[T], inp_l: &Layout, k: &[T], k_l: &Layout) -> Result<Vec<T>> {
+        let p = self.0;
+        let inp = &inp[inp_l.start_offset()..];
+        let (inp_s0, inp_s1, inp_s2, inp_s3) = crate::shape::dims4(inp_l.stride())?;
+        let k = &k[k_l.start_offset()..];
+        let (k_s0, k_s1, k_s2, k_s3) = crate::shape::dims4(k_l.stride())?;
+        let (out_h, out_w) = (p.out_h(), p.out_w());
+
+        // Output shape: [b_size, c_out, out_h, out_w].
+        let mut dst = vec![T::zero(); p.b_size * p.c_out * out_h * out_w];
+        let dst_s0 = p.c_out * out_h * out_w;
+        let dst_s1 = out_h * out_w;
+        let dst_s2 = out_w;
+        let dst_s3 = 1;
+        for b_idx in 0..p.b_size {
+            for out_y in 0..out_h as i32 {
+                for out_x in 0..out_w as i32 {
+                    let inp_x = out_x * p.stride as i32 - p.padding as i32;
+                    let inp_y = out_y * p.stride as i32 - p.padding as i32;
+                    for k_y in 0..p.k_h as i32 {
+                        for k_x in 0..p.k_h as i32 {
+                            let k_index = k_y as usize * k_s2 + k_x as usize * k_s3;
+                            let inp_y = inp_y + k_y;
+                            let inp_x = inp_x + k_x;
+                            if inp_x < 0 || inp_y < 0 {
+                                continue;
+                            }
+                            let inp_x = inp_x as usize;
+                            let inp_y = inp_y as usize;
+                            if inp_x < p.i_w && inp_y < p.i_h {
+                                let inp_index = b_idx * inp_s0 + inp_y * inp_s2 + inp_x * inp_s3;
+                                let dst_index = b_idx * dst_s0 + inp_y * dst_s2 + inp_x * dst_s3;
+                                for c_out in 0..k_s0 {
+                                    for c_in in 0..k_s1 {
+                                        let k_index = k_index + c_out * k_s1 + c_in * k_s0;
+                                        let dst_index = dst_index + c_out * dst_s1;
+                                        let inp_index = inp_index + c_in * inp_s1;
+                                        dst[dst_index] += k[k_index] * inp[inp_index]
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        Ok(dst)
+    }
+}
+
 struct MatMul((usize, usize, usize, usize));

 impl MatMul {
@ -2043,6 +2097,16 @@ impl BackendStorage for CpuStorage {
        Conv2D(params).map(self, l, kernel, kernel_l)
    }

+    fn conv_transpose2d(
+        &self,
+        l: &Layout,
+        kernel: &Self,
+        kernel_l: &Layout,
+        params: &crate::conv::ParamsConvTranspose2D,
+    ) -> Result<Self> {
+        ConvTranspose2D(params).map(self, l, kernel, kernel_l)
+    }
+
    fn index_select(&self, ids: &Self, l: &Layout, ids_l: &Layout, dim: usize) -> Result<Self> {
        match ids {
            Self::U8(ids) => IndexSelect { ids, ids_l, dim }.map(self, l),
--- a/candle-core/src/cuda_backend.rs
+++ b/candle-core/src/cuda_backend.rs
@ -1647,6 +1647,16 @@ impl BackendStorage for CudaStorage {
        Ok(Self { slice, device })
    }

+    fn conv_transpose2d(
+        &self,
+        _l: &Layout,
+        _kernel: &Self,
+        _kernel_l: &Layout,
+        _params: &crate::conv::ParamsConvTranspose2D,
+    ) -> Result<Self> {
+        todo!()
+    }
+
    fn avg_pool2d(&self, l: &Layout, k: (usize, usize), stride: (usize, usize)) -> Result<Self> {
        let device = self.device().clone();
        let slice = Pool2D {
--- a/candle-core/src/dummy_cuda_backend.rs
+++ b/candle-core/src/dummy_cuda_backend.rs
@ -85,6 +85,16 @@ impl crate::backend::BackendStorage for CudaStorage {
        Err(Error::NotCompiledWithCudaSupport)
    }

+    fn conv_transpose2d(
+        &self,
+        _l: &Layout,
+        _kernel: &Self,
+        _kernel_l: &Layout,
+        _params: &crate::conv::ParamsConvTranspose2D,
+    ) -> Result<Self> {
+        Err(Error::NotCompiledWithCudaSupport)
+    }
+
    fn index_select(&self, _: &Self, _: &Layout, _: &Layout, _: usize) -> Result<Self> {
        Err(Error::NotCompiledWithCudaSupport)
    }
--- a/candle-core/src/op.rs
+++ b/candle-core/src/op.rs
@ -91,6 +91,15 @@ pub enum Op {
        stride: usize,
    },

+    #[allow(dead_code)]
+    ConvTranspose2D {
+        arg: Tensor,
+        kernel: Tensor,
+        padding: usize,
+        output_padding: usize,
+        stride: usize,
+    },
+
    AvgPool2D {
        arg: Tensor,
        kernel_size: (usize, usize),
--- a/candle-core/src/storage.rs
+++ b/candle-core/src/storage.rs
@ -293,6 +293,33 @@ impl Storage {
        }
    }

+    pub(crate) fn conv_transpose2d(
+        &self,
+        l: &Layout,
+        kernel: &Self,
+        kernel_l: &Layout,
+        params: &crate::conv::ParamsConvTranspose2D,
+    ) -> Result<Self> {
+        self.same_device(kernel, "conv_transpose2d")?;
+        self.same_dtype(kernel, "conv_transpose2d")?;
+        match (self, &kernel) {
+            (Storage::Cpu(inp), Storage::Cpu(kernel)) => {
+                let s = inp.conv_transpose2d(l, kernel, kernel_l, params)?;
+                Ok(Self::Cpu(s))
+            }
+            (Storage::Cuda(inp), Storage::Cuda(kernel)) => {
+                let s = inp.conv_transpose2d(l, kernel, kernel_l, params)?;
+                Ok(Self::Cuda(s))
+            }
+            (lhs, rhs) => Err(Error::DeviceMismatchBinaryOp {
+                lhs: lhs.device().location(),
+                rhs: rhs.device().location(),
+                op: "conv_transpose2d",
+            }
+            .bt()),
+        }
+    }
+
    pub(crate) fn avg_pool2d(
        &self,
        layout: &Layout,
--- a/candle-core/tests/conv_tests.rs
+++ b/candle-core/tests/conv_tests.rs
@ -130,6 +130,16 @@ print(t.flatten())
 print(w.flatten())
 res = torch.nn.functional.conv2d(t, w)
 print(res.flatten())
+
+w_t = w.transpose(0, 1)
+res = torch.nn.functional.conv_transpose2d(t, w_t)
+print(res.shape)
+print(res.flatten())
+
+t_t = w.transpose(0, 1)
+res = torch.nn.functional.conv_transpose2d(t_t, w)
+print(res.shape)
+print(res.flatten())
 */
 fn conv2d_small(dev: &Device) -> Result<()> {
    let t = Tensor::new(
@ -160,6 +170,26 @@ fn conv2d_small(dev: &Device) -> Result<()> {
            0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000
        ]
    );
+    // TODO: enable the test for cuda once we have the proper implementation in place.
+    if dev.is_cpu() {
+        let res = t.conv_transpose2d(&w.transpose(0, 1)?, 0, 0, 1)?;
+        assert_eq!(res.dims(), [1, 1, 3, 3]);
+        assert_eq!(
+            test_utils::to_vec1_round(&res.flatten_all()?, 4)?,
+            [0.164, -0.0111, -0.1742, 2.6437, -2.0268, 1.1823, 3.2855, -1.0324, 0.2539],
+        );
+        let res = t.transpose(0, 1)?.conv_transpose2d(&w, 0, 0, 1)?;
+        assert_eq!(res.dims(), [2, 2, 3, 3]);
+        assert_eq!(
+            test_utils::to_vec1_round(&res.flatten_all()?, 4)?,
+            [
+                -0.3755, 0.8045, -0.6336, -0.2218, -1.1369, 0.8599, 1.5768, -0.1268, -0.1728,
+                0.528, -1.131, 0.8908, 0.3118, 1.5984, -1.2089, -2.2168, 0.1783, 0.2429, -0.3838,
+                0.5802, -0.3268, -2.0382, 0.6329, -0.2293, -1.2154, 0.6441, -0.3035, 0.5396,
+                -0.8156, 0.4594, 2.8654, -0.8898, 0.3224, 1.7087, -0.9056, 0.4267
+            ]
+        );
+    }
    Ok(())
 }