This is duplicated code on Cuda 12.2.

Without it we can compile for 52 (but I get Operation Not supported when actually trying to use those kernels).
2025-06-16 10:38:54 +00:00 · 2023-08-10 09:20:18 +02:00
parent de7c31bfe9
commit 66d1c093e0
1 changed files with 0 additions and 18 deletions
--- a/candle-kernels/src/compatibility.cuh
+++ b/candle-kernels/src/compatibility.cuh
@ -6,24 +6,6 @@

 // FIXME: the minimum compute capabilities are just guesses since the table is not specific enough

-// #if __CUDA_ARCH__ < 600
-// __device__ __forceinline__ __half __hmax(__half a, __half b) {
-//     return __float2half(fmaxf(__half2float(a), __half2float(b)));
-// }
-// __device__ __forceinline__ __half __hmin(__half a, __half b) {
-//     return __float2half(fminf(__half2float(a), __half2float(b)));
-// }
-// #endif
-
-#if __CUDA_ARCH__ < 800
-__device__ __forceinline__ __half __hmax_nan(__half a, __half b) {
-    // return __hisnan(a) ? a : (__hisnan(b) ? b : __hmax(a, b));
-}
-__device__ __forceinline__ __half __hmin_nan(__half a, __half b) {
-    // return __hisnan(a) ? a : (__hisnan(b) ? b : __hmin(a, b));
-}
-#endif
-
 #if __CUDA_ARCH__ < 600
 // Copied from https://docs.nvidia.com/cuda/cuda-c-programming-guide/#atomic-functions
 __device__ double atomicAdd(double* address, double val) {