mirror of
https://github.com/huggingface/candle.git
synced 2025-06-18 11:37:11 +00:00
Cuda kernels for fast min/max reductions (#203)
* Add the min/max cuda kernels. * Better integration of the cuda kernels.
This commit is contained in:
@ -1,4 +1,6 @@
|
||||
#include "compatibility.cuh"
|
||||
#include<stdint.h>
|
||||
#include<cmath>
|
||||
|
||||
// TODO: This is often used to check that the data is contiguous so that
|
||||
// kernels can be easily mapped. However this only returns true for row
|
||||
@ -140,6 +142,9 @@ __device__ __forceinline__ double absg(double a) { return fabs(a); }
|
||||
__device__ __forceinline__ float copysigng(float a, float b) { return copysignf(a, b); }
|
||||
__device__ __forceinline__ double copysigng(double a, double b) { return copysign(a, b); }
|
||||
|
||||
__device__ __forceinline__ uint32_t ming(uint32_t a, uint32_t b) { return min(a, b); }
|
||||
__device__ __forceinline__ uint32_t maxg(uint32_t a, uint32_t b) { return max(a, b); }
|
||||
|
||||
#if __CUDA_ARCH__ >= 530
|
||||
__device__ __forceinline__ __half powg(__half a, __half b) { return __float2half(powf(__half2float(a), __half2float(b))); }
|
||||
__device__ __forceinline__ bool isnang(__half a) { return __hisnan(a); }
|
||||
|
Reference in New Issue
Block a user