More efficient cuda implementation for ConvTranspose1d. (#2211)

* More efficient cuda implementation for ConvTranspose1d.

* Small tweak.
This commit is contained in:
Laurent Mazare
2024-05-24 11:05:43 +02:00
committed by GitHub
parent d54e02d73d
commit 6f0b807ffd
3 changed files with 140 additions and 4 deletions

View File

@ -10,7 +10,7 @@ pub use utils::{
};
const USE_IM2COL_CONV1D: bool = true;
const USE_IM2COL_CONV1D_TR: bool = true;
const USE_COL2IM_CONV1D_TR: bool = true;
const USE_IM2COL_CONV2D: bool = true;
// TODO: Maybe we should not implement [Clone] here and instead have an explicit allocator +
@ -2249,7 +2249,7 @@ impl BackendStorage for CpuStorage {
&& params.dilation == 1
&& params.padding == 0
&& params.output_padding == 0;
if USE_IM2COL_CONV1D_TR && can_use_col2im {
if USE_COL2IM_CONV1D_TR && can_use_col2im {
let (b_size, c_in, l_in) = l.shape().dims3()?;
let (c_in2, c_out, k_size) = kernel_l.shape().dims3()?;
if !kernel_l.is_contiguous() {