Add some cmp tests. (#233)

* Add some cmp tests. * Add the cuda kernels for comparison operations.
2025-06-19 19:58:35 +00:00 · 2023-07-24 16:53:45 +01:00
parent 160ba09d30
commit b50f932e7c
4 changed files with 134 additions and 12 deletions
--- a/candle-kernels/src/binary.cu
+++ b/candle-kernels/src/binary.cu
@ -6,6 +6,12 @@ BINARY_OP(__nv_bfloat16, badd_bf16, x + y)
 BINARY_OP(__nv_bfloat16, bdiv_bf16, x / y)
 BINARY_OP(__nv_bfloat16, bmul_bf16, x * y)
 BINARY_OP(__nv_bfloat16, bsub_bf16, x - y)
+BINARY_OP_OUT(__nv_bfloat16, uint8_t, eq_bf16, x == y)
+BINARY_OP_OUT(__nv_bfloat16, uint8_t, ne_bf16, x != y)
+BINARY_OP_OUT(__nv_bfloat16, uint8_t, lt_bf16, x < y)
+BINARY_OP_OUT(__nv_bfloat16, uint8_t, le_bf16, x <= y)
+BINARY_OP_OUT(__nv_bfloat16, uint8_t, gt_bf16, x > y)
+BINARY_OP_OUT(__nv_bfloat16, uint8_t, ge_bf16, x >= y)
 #endif

 #if __CUDA_ARCH__ >= 530
@ -13,6 +19,12 @@ BINARY_OP(__half, badd_f16, x + y)
 BINARY_OP(__half, bdiv_f16, x / y)
 BINARY_OP(__half, bmul_f16, x * y)
 BINARY_OP(__half, bsub_f16, x - y)
+BINARY_OP_OUT(__half, uint8_t, eq_f16, x == y)
+BINARY_OP_OUT(__half, uint8_t, ne_f16, x != y)
+BINARY_OP_OUT(__half, uint8_t, lt_f16, x < y)
+BINARY_OP_OUT(__half, uint8_t, le_f16, x <= y)
+BINARY_OP_OUT(__half, uint8_t, gt_f16, x > y)
+BINARY_OP_OUT(__half, uint8_t, ge_f16, x >= y)
 #endif

 BINARY_OP(float, badd_f32, x + y)
@ -31,3 +43,33 @@ BINARY_OP(float, bsub_f32, x - y)
 BINARY_OP(double, bsub_f64, x - y);
 BINARY_OP(uint8_t, bsub_u8, x - y);
 BINARY_OP(uint32_t, bsub_u32, x - y);
+
+BINARY_OP_OUT(float, uint8_t, eq_f32, x == y)
+BINARY_OP_OUT(double, uint8_t, eq_f64, x == y)
+BINARY_OP_OUT(uint8_t, uint8_t, eq_u8, x == y)
+BINARY_OP_OUT(uint32_t, uint8_t, eq_u32, x == y)
+
+BINARY_OP_OUT(float, uint8_t, ne_f32, x != y)
+BINARY_OP_OUT(double, uint8_t, ne_f64, x != y)
+BINARY_OP_OUT(uint8_t, uint8_t, ne_u8, x != y)
+BINARY_OP_OUT(uint32_t, uint8_t, ne_u32, x != y)
+
+BINARY_OP_OUT(float, uint8_t, lt_f32, x < y)
+BINARY_OP_OUT(double, uint8_t, lt_f64, x < y)
+BINARY_OP_OUT(uint8_t, uint8_t, lt_u8, x < y)
+BINARY_OP_OUT(uint32_t, uint8_t, lt_u32, x < y)
+
+BINARY_OP_OUT(float, uint8_t, le_f32, x <= y)
+BINARY_OP_OUT(double, uint8_t, le_f64, x <= y)
+BINARY_OP_OUT(uint8_t, uint8_t, le_u8, x <= y)
+BINARY_OP_OUT(uint32_t, uint8_t, le_u32, x <= y)
+
+BINARY_OP_OUT(float, uint8_t, gt_f32, x > y)
+BINARY_OP_OUT(double, uint8_t, gt_f64, x > y)
+BINARY_OP_OUT(uint8_t, uint8_t, gt_u8, x > y)
+BINARY_OP_OUT(uint32_t, uint8_t, gt_u32, x > y)
+
+BINARY_OP_OUT(float, uint8_t, ge_f32, x >= y)
+BINARY_OP_OUT(double, uint8_t, ge_f64, x >= y)
+BINARY_OP_OUT(uint8_t, uint8_t, ge_u8, x >= y)
+BINARY_OP_OUT(uint32_t, uint8_t, ge_u32, x >= y)