Handle transposed matrixes in cublas.

This commit is contained in:
laurent
2023-06-26 17:49:29 +01:00
parent 3761f02aa8
commit 1ad5baecc5
3 changed files with 46 additions and 17 deletions

View File

@ -1,4 +1,5 @@
// TODO: Use a proper distributed reduction rather than atomicAdd.
// https://people.maths.ox.ac.uk/gilesm/cuda/prac4/reduction.pdf
#include "cuda_utils.cuh"
#include<stdint.h>