From dffafd10495c4d85771e24143754ee98c5019289 Mon Sep 17 00:00:00 2001 From: laurent Date: Sun, 17 Mar 2024 20:15:51 +0100 Subject: [PATCH] Small optimization. --- candle-kernels/src/conv.cu | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/candle-kernels/src/conv.cu b/candle-kernels/src/conv.cu index a08a4088..bea71125 100644 --- a/candle-kernels/src/conv.cu +++ b/candle-kernels/src/conv.cu @@ -82,13 +82,12 @@ __device__ void col2im1d( const size_t src_s2 = k_size; T d = 0; - for (size_t k_i = 0; k_i < min(dst_i3 + 1, k_size); ++k_i) { - const size_t l_in_i_times_stride = dst_i3 - k_i; - const size_t l_in_i = l_in_i_times_stride / stride; + size_t l_in_i = dst_i3 / stride; + const size_t k_i = dst_i3 - stride * l_in_i; + for (size_t k_i = 0; k_i < min(dst_i3 + 1, k_size); k_i += stride) { const size_t src_i = b_i * src_s0 + l_in_i * src_s1 + c_i * src_s2 + k_i; - if (l_in_i * stride == l_in_i_times_stride) { - d += src[src_i]; - } + d += src[src_i]; + l_in_i -= 1; } dst[dst_i] = d; }