diff --git a/candle-kernels/src/compatibility.cuh b/candle-kernels/src/compatibility.cuh index 5a22f4bc..d0791749 100644 --- a/candle-kernels/src/compatibility.cuh +++ b/candle-kernels/src/compatibility.cuh @@ -6,6 +6,15 @@ // FIXME: the minimum compute capabilities are just guesses since the table is not specific enough +#if (__CUDACC_VER_MAJOR__ < 12 || __CUDACC_VER_MINOR__ < 2) && __CUDA_ARCH__ < 800 +__device__ __forceinline__ __half __hmax_nan(__half a, __half b) { + return __hisnan(a) ? a : (__hisnan(b) ? b : __hmax(a, b)); +} +__device__ __forceinline__ __half __hmin_nan(__half a, __half b) { + return __hisnan(a) ? a : (__hisnan(b) ? b : __hmin(a, b)); +} +#endif + #if __CUDA_ARCH__ < 600 // Copied from https://docs.nvidia.com/cuda/cuda-c-programming-guide/#atomic-functions __device__ double atomicAdd(double* address, double val) {