Add cuda support for unary ops.

This commit is contained in:
laurent
2023-06-22 15:12:59 +01:00
parent b8f514d9c6
commit 5276755fb3
7 changed files with 101 additions and 23 deletions

View File

@ -2,28 +2,16 @@
#if __CUDA_ARCH__ >= 530
BINARY_OP(__half, badd_f16, x + y)
BINARY_OP(__half, bdiv_f16, x / y)
BINARY_OP(__half, bmul_f16, x * y)
BINARY_OP(__half, bsub_f16, x - y)
#endif
BINARY_OP(float, badd_f32, x + y)
BINARY_OP(double, badd_fwd_f64, x + y);
#if __CUDA_ARCH__ >= 530
BINARY_OP(__half, bdiv_f16, x / y)
#endif
BINARY_OP(float, bdiv_f32, x / y)
BINARY_OP(double, bdiv_f64, x / y);
#if __CUDA_ARCH__ >= 530
BINARY_OP(__half, bmul_f16, x * y)
#endif
BINARY_OP(float, bmul_f32, x * y)
BINARY_OP(double, bmul_f64, x * y);
#if __CUDA_ARCH__ >= 530
BINARY_OP(__half, bsub_f16, x - y)
#endif
BINARY_OP(float, bsub_f32, x - y)
BINARY_OP(double, bsub_f64, x - y);

View File

@ -1,3 +1,4 @@
pub const AFFINE: &str = include_str!(concat!(env!("OUT_DIR"), "/affine.ptx"));
pub const BINARY: &str = include_str!(concat!(env!("OUT_DIR"), "/binary.ptx"));
pub const FILL: &str = include_str!(concat!(env!("OUT_DIR"), "/fill.ptx"));
pub const UNARY: &str = include_str!(concat!(env!("OUT_DIR"), "/unary.ptx"));

26
kernels/src/unary.cu Normal file
View File

@ -0,0 +1,26 @@
#include "cuda_utils.cuh"
#define UNARY_OP(TYPENAME, FN_NAME, FUNC) \
extern "C" __global__ void FN_NAME( \
const size_t numel, \
const TYPENAME *inp, \
TYPENAME *out \
) { \
for (unsigned int i = blockIdx.x * blockDim.x + threadIdx.x; i < numel; i += blockDim.x * gridDim.x) { \
TYPENAME x = inp ? inp[i] : out[i]; \
out[i] = FUNC; \
} \
} \
#if __CUDA_ARCH__ >= 530
UNARY_OP(__half, uneg_f16, -x)
UNARY_OP(__half, usqr_f16, x*x)
UNARY_OP(__half, usqrt_f16, sqrtg(x))
#endif
UNARY_OP(float, uneg_f32, -x)
UNARY_OP(float, uneg_f64, -x)
UNARY_OP(float, usqr_f32, x*x)
UNARY_OP(float, usqr_f64, x*x)
UNARY_OP(float, usqrt_f32, sqrtg(x))
UNARY_OP(float, usqrt_f64, sqrtg(x))