Add the where kernels.

This commit is contained in:
laurent
2023-06-26 13:25:02 +01:00
parent b1d6e264da
commit cd2a171c06
3 changed files with 42 additions and 0 deletions

View File

@ -1,6 +1,10 @@
#include "cuda_fp16.h"
#include "compatibility.cuh"
// TODO: This is often used to check that the data is contiguous so that
// kernels can be easily mapped. However this only returns true for row
// major, if all the inputs are column major, we could apply the fast path
// too (but we wouldn't if some of them are row major and some column major).
__device__ bool is_contiguous(
const size_t num_dims,
const size_t *dims,