Small optimization.

This commit is contained in:
laurent
2024-03-17 20:15:51 +01:00
parent 75f2aea5fd
commit dffafd1049

View File

@ -82,13 +82,12 @@ __device__ void col2im1d(
const size_t src_s2 = k_size; const size_t src_s2 = k_size;
T d = 0; T d = 0;
for (size_t k_i = 0; k_i < min(dst_i3 + 1, k_size); ++k_i) { size_t l_in_i = dst_i3 / stride;
const size_t l_in_i_times_stride = dst_i3 - k_i; const size_t k_i = dst_i3 - stride * l_in_i;
const size_t l_in_i = l_in_i_times_stride / stride; for (size_t k_i = 0; k_i < min(dst_i3 + 1, k_size); k_i += stride) {
const size_t src_i = b_i * src_s0 + l_in_i * src_s1 + c_i * src_s2 + k_i; const size_t src_i = b_i * src_s0 + l_in_i * src_s1 + c_i * src_s2 + k_i;
if (l_in_i * stride == l_in_i_times_stride) {
d += src[src_i]; d += src[src_i];
} l_in_i -= 1;
} }
dst[dst_i] = d; dst[dst_i] = d;
} }