Add the bf16 cuda kernels.

This commit is contained in:
laurent
2023-06-29 23:12:02 +01:00
parent 018e017e7e
commit ec79fc43f2
9 changed files with 67 additions and 1 deletions

View File

@ -43,6 +43,10 @@ extern "C" __global__ void FN_NAME( \
} \
} \
#if __CUDA_ARCH__ >= 800
SUM_OP(__nv_bfloat16, sum_bf16)
#endif
#if __CUDA_ARCH__ >= 530
SUM_OP(__half, sum_f16)
#endif