diff --git a/candle-kernels/src/compatibility.cuh b/candle-kernels/src/compatibility.cuh
index 5a22f4bc..d0791749 100644
--- a/candle-kernels/src/compatibility.cuh
+++ b/candle-kernels/src/compatibility.cuh
@@ -6,6 +6,15 @@
 
 // FIXME: the minimum compute capabilities are just guesses since the table is not specific enough
 
+#if (__CUDACC_VER_MAJOR__ < 12 || __CUDACC_VER_MINOR__ < 2) && __CUDA_ARCH__ < 800
+__device__ __forceinline__ __half __hmax_nan(__half a, __half b) {
+    return __hisnan(a) ? a : (__hisnan(b) ? b : __hmax(a, b));
+}
+__device__ __forceinline__ __half __hmin_nan(__half a, __half b) {
+    return __hisnan(a) ? a : (__hisnan(b) ? b : __hmin(a, b));
+}
+#endif
+
 #if __CUDA_ARCH__ < 600
 // Copied from https://docs.nvidia.com/cuda/cuda-c-programming-guide/#atomic-functions
 __device__ double atomicAdd(double* address, double val) {