Add conv-transpose. (#635)

* Add conv-transpose.

* Return zeros for now.

* Naive CPU implementation.

* Add a conv-transpose test + fix the cpu implementation.

* Add a second test.
This commit is contained in:
Laurent Mazare
2023-08-28 10:10:12 +01:00
committed by GitHub
parent 26e1b40992
commit 3cca89cc70
9 changed files with 244 additions and 0 deletions

View File

@ -45,6 +45,14 @@ pub trait BackendStorage: Sized {
_params: &crate::conv::ParamsConv2D,
) -> Result<Self>;
fn conv_transpose2d(
&self,
_l: &Layout,
_kernel: &Self,
_kernel_l: &Layout,
_params: &crate::conv::ParamsConvTranspose2D,
) -> Result<Self>;
fn avg_pool2d(&self, _: &Layout, _: (usize, usize), _: (usize, usize)) -> Result<Self>;
fn max_pool2d(&self, _: &Layout, _: (usize, usize), _: (usize, usize)) -> Result<Self>;
fn upsample_nearest2d(&self, _: &Layout, _: usize, _: usize) -> Result<Self>;

View File

@ -60,6 +60,11 @@ impl Tensor {
kernel: rhs,
..
}
| Op::ConvTranspose2D {
arg: lhs,
kernel: rhs,
..
}
| Op::CustomOp2(lhs, rhs, _)
| Op::Binary(lhs, rhs, _)
| Op::Gather(lhs, rhs, _)
@ -188,6 +193,9 @@ impl Tensor {
}
Op::Conv1D { .. } => Err(Error::BackwardNotSupported { op: "conv1d" })?,
Op::Conv2D { .. } => Err(Error::BackwardNotSupported { op: "conv2d" })?,
Op::ConvTranspose2D { .. } => Err(Error::BackwardNotSupported {
op: "conv-transpose2d",
})?,
Op::AvgPool2D { .. } => Err(Error::BackwardNotSupported { op: "avg-pool2d" })?,
Op::MaxPool2D { .. } => Err(Error::BackwardNotSupported { op: "max-pool2d" })?,
Op::UpsampleNearest2D { .. } => Err(Error::BackwardNotSupported {

View File

@ -54,6 +54,42 @@ impl ParamsConv2D {
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ParamsConvTranspose2D {
pub(crate) b_size: usize,
pub(crate) i_h: usize,
pub(crate) i_w: usize,
pub(crate) k_h: usize,
pub(crate) k_w: usize,
pub(crate) c_out: usize,
pub(crate) c_in: usize,
pub(crate) padding: usize,
pub(crate) output_padding: usize,
pub(crate) stride: usize,
}
impl ParamsConvTranspose2D {
pub(crate) fn out_h(&self) -> usize {
let dilation = 1;
(self.i_h - 1) * self.stride - 2 * self.padding
+ dilation * (self.k_h - 1)
+ self.output_padding
+ 1
}
pub(crate) fn out_w(&self) -> usize {
let dilation = 1;
(self.i_w - 1) * self.stride - 2 * self.padding
+ dilation * (self.k_w - 1)
+ self.output_padding
+ 1
}
pub(crate) fn out_dims(&self) -> Vec<usize> {
vec![self.b_size, self.c_out, self.out_h(), self.out_w()]
}
}
impl Tensor {
fn conv1d_single_group(&self, kernel: &Self, params: &ParamsConv1D) -> Result<Self> {
let storage =
@ -166,4 +202,46 @@ impl Tensor {
Tensor::cat(&blocks, 1)
}
}
/// Applies a 2D transposed convolution over the input tensor.
pub fn conv_transpose2d(
&self,
kernel: &Self,
padding: usize,
output_padding: usize,
stride: usize,
) -> Result<Self> {
let (b_size, c_in, i_h, i_w) = self.dims4()?;
let (c_in_k, c_out, k_h, k_w) = kernel.dims4()?;
if c_in != c_in_k {
crate::bail!("in_channel mismatch between input ({c_in}) and kernel ({c_in_k})")
}
let params = ParamsConvTranspose2D {
b_size,
i_h,
i_w,
k_h,
k_w,
c_out,
c_in,
padding,
output_padding,
stride,
};
let storage = self.storage().conv_transpose2d(
self.layout(),
&kernel.storage(),
kernel.layout(),
&params,
)?;
let op = BackpropOp::new2(self, kernel, |arg, kernel| Op::ConvTranspose2D {
arg,
kernel,
padding: params.padding,
output_padding: params.output_padding,
stride: params.stride,
});
let out_dims = params.out_dims();
Ok(crate::tensor::from_storage(storage, out_dims, op, false))
}
}

View File

@ -1180,6 +1180,60 @@ impl<'a> Map2 for Conv2D<'a> {
}
}
struct ConvTranspose2D<'a>(&'a crate::conv::ParamsConvTranspose2D);
impl<'a> Map2 for ConvTranspose2D<'a> {
const OP: &'static str = "conv_transpose2d";
fn f<T: WithDType>(&self, inp: &[T], inp_l: &Layout, k: &[T], k_l: &Layout) -> Result<Vec<T>> {
let p = self.0;
let inp = &inp[inp_l.start_offset()..];
let (inp_s0, inp_s1, inp_s2, inp_s3) = crate::shape::dims4(inp_l.stride())?;
let k = &k[k_l.start_offset()..];
let (k_s0, k_s1, k_s2, k_s3) = crate::shape::dims4(k_l.stride())?;
let (out_h, out_w) = (p.out_h(), p.out_w());
// Output shape: [b_size, c_out, out_h, out_w].
let mut dst = vec![T::zero(); p.b_size * p.c_out * out_h * out_w];
let dst_s0 = p.c_out * out_h * out_w;
let dst_s1 = out_h * out_w;
let dst_s2 = out_w;
let dst_s3 = 1;
for b_idx in 0..p.b_size {
for out_y in 0..out_h as i32 {
for out_x in 0..out_w as i32 {
let inp_x = out_x * p.stride as i32 - p.padding as i32;
let inp_y = out_y * p.stride as i32 - p.padding as i32;
for k_y in 0..p.k_h as i32 {
for k_x in 0..p.k_h as i32 {
let k_index = k_y as usize * k_s2 + k_x as usize * k_s3;
let inp_y = inp_y + k_y;
let inp_x = inp_x + k_x;
if inp_x < 0 || inp_y < 0 {
continue;
}
let inp_x = inp_x as usize;
let inp_y = inp_y as usize;
if inp_x < p.i_w && inp_y < p.i_h {
let inp_index = b_idx * inp_s0 + inp_y * inp_s2 + inp_x * inp_s3;
let dst_index = b_idx * dst_s0 + inp_y * dst_s2 + inp_x * dst_s3;
for c_out in 0..k_s0 {
for c_in in 0..k_s1 {
let k_index = k_index + c_out * k_s1 + c_in * k_s0;
let dst_index = dst_index + c_out * dst_s1;
let inp_index = inp_index + c_in * inp_s1;
dst[dst_index] += k[k_index] * inp[inp_index]
}
}
}
}
}
}
}
}
Ok(dst)
}
}
struct MatMul((usize, usize, usize, usize));
impl MatMul {
@ -2043,6 +2097,16 @@ impl BackendStorage for CpuStorage {
Conv2D(params).map(self, l, kernel, kernel_l)
}
fn conv_transpose2d(
&self,
l: &Layout,
kernel: &Self,
kernel_l: &Layout,
params: &crate::conv::ParamsConvTranspose2D,
) -> Result<Self> {
ConvTranspose2D(params).map(self, l, kernel, kernel_l)
}
fn index_select(&self, ids: &Self, l: &Layout, ids_l: &Layout, dim: usize) -> Result<Self> {
match ids {
Self::U8(ids) => IndexSelect { ids, ids_l, dim }.map(self, l),

View File

@ -1647,6 +1647,16 @@ impl BackendStorage for CudaStorage {
Ok(Self { slice, device })
}
fn conv_transpose2d(
&self,
_l: &Layout,
_kernel: &Self,
_kernel_l: &Layout,
_params: &crate::conv::ParamsConvTranspose2D,
) -> Result<Self> {
todo!()
}
fn avg_pool2d(&self, l: &Layout, k: (usize, usize), stride: (usize, usize)) -> Result<Self> {
let device = self.device().clone();
let slice = Pool2D {

View File

@ -85,6 +85,16 @@ impl crate::backend::BackendStorage for CudaStorage {
Err(Error::NotCompiledWithCudaSupport)
}
fn conv_transpose2d(
&self,
_l: &Layout,
_kernel: &Self,
_kernel_l: &Layout,
_params: &crate::conv::ParamsConvTranspose2D,
) -> Result<Self> {
Err(Error::NotCompiledWithCudaSupport)
}
fn index_select(&self, _: &Self, _: &Layout, _: &Layout, _: usize) -> Result<Self> {
Err(Error::NotCompiledWithCudaSupport)
}

View File

@ -91,6 +91,15 @@ pub enum Op {
stride: usize,
},
#[allow(dead_code)]
ConvTranspose2D {
arg: Tensor,
kernel: Tensor,
padding: usize,
output_padding: usize,
stride: usize,
},
AvgPool2D {
arg: Tensor,
kernel_size: (usize, usize),

View File

@ -293,6 +293,33 @@ impl Storage {
}
}
pub(crate) fn conv_transpose2d(
&self,
l: &Layout,
kernel: &Self,
kernel_l: &Layout,
params: &crate::conv::ParamsConvTranspose2D,
) -> Result<Self> {
self.same_device(kernel, "conv_transpose2d")?;
self.same_dtype(kernel, "conv_transpose2d")?;
match (self, &kernel) {
(Storage::Cpu(inp), Storage::Cpu(kernel)) => {
let s = inp.conv_transpose2d(l, kernel, kernel_l, params)?;
Ok(Self::Cpu(s))
}
(Storage::Cuda(inp), Storage::Cuda(kernel)) => {
let s = inp.conv_transpose2d(l, kernel, kernel_l, params)?;
Ok(Self::Cuda(s))
}
(lhs, rhs) => Err(Error::DeviceMismatchBinaryOp {
lhs: lhs.device().location(),
rhs: rhs.device().location(),
op: "conv_transpose2d",
}
.bt()),
}
}
pub(crate) fn avg_pool2d(
&self,
layout: &Layout,

View File

@ -130,6 +130,16 @@ print(t.flatten())
print(w.flatten())
res = torch.nn.functional.conv2d(t, w)
print(res.flatten())
w_t = w.transpose(0, 1)
res = torch.nn.functional.conv_transpose2d(t, w_t)
print(res.shape)
print(res.flatten())
t_t = w.transpose(0, 1)
res = torch.nn.functional.conv_transpose2d(t_t, w)
print(res.shape)
print(res.flatten())
*/
fn conv2d_small(dev: &Device) -> Result<()> {
let t = Tensor::new(
@ -160,6 +170,26 @@ fn conv2d_small(dev: &Device) -> Result<()> {
0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000
]
);
// TODO: enable the test for cuda once we have the proper implementation in place.
if dev.is_cpu() {
let res = t.conv_transpose2d(&w.transpose(0, 1)?, 0, 0, 1)?;
assert_eq!(res.dims(), [1, 1, 3, 3]);
assert_eq!(
test_utils::to_vec1_round(&res.flatten_all()?, 4)?,
[0.164, -0.0111, -0.1742, 2.6437, -2.0268, 1.1823, 3.2855, -1.0324, 0.2539],
);
let res = t.transpose(0, 1)?.conv_transpose2d(&w, 0, 0, 1)?;
assert_eq!(res.dims(), [2, 2, 3, 3]);
assert_eq!(
test_utils::to_vec1_round(&res.flatten_all()?, 4)?,
[
-0.3755, 0.8045, -0.6336, -0.2218, -1.1369, 0.8599, 1.5768, -0.1268, -0.1728,
0.528, -1.131, 0.8908, 0.3118, 1.5984, -1.2089, -2.2168, 0.1783, 0.2429, -0.3838,
0.5802, -0.3268, -2.0382, 0.6329, -0.2293, -1.2154, 0.6441, -0.3035, 0.5396,
-0.8156, 0.4594, 2.8654, -0.8898, 0.3224, 1.7087, -0.9056, 0.4267
]
);
}
Ok(())
}