Make it possible to use TF32 accumulation in F32 matmuls. (#2178)

* Allow the use of tf32 accumulation in matmul.

* Better timings.

* Dummy versions for use when cuda is not enabled.
This commit is contained in:
Laurent Mazare
2024-05-11 12:28:39 +02:00
committed by GitHub
parent d9bc5ec151
commit 9cff7bc3f4
3 changed files with 89 additions and 30 deletions

View File

@ -258,3 +258,13 @@ pub fn gemm_reduced_precision_bf16() -> bool {
/// This bool controls whether reduced precision reductions (e.g., with fp16 accumulation type) are
/// allowed with bf16 GEMMs.
pub fn set_gemm_reduced_precision_bf16(_: bool) {}
/// This bool controls whether reduced precision reductions (e.g., with tf32 accumulation type) are
/// allowed with f32 GEMMs.
pub fn gemm_reduced_precision_f32() -> bool {
true
}
/// This bool controls whether reduced precision reductions (e.g., with tf32 accumulation type) are
/// allowed with f32 GEMMs.
pub fn set_gemm_reduced_precision_f32(_b: bool) {}