Make it possible to use TF32 accumulation in F32 matmuls. (#2178)

* Allow the use of tf32 accumulation in matmul. * Better timings. * Dummy versions for use when cuda is not enabled.
2025-06-18 19:47:12 +00:00 · 2024-05-11 12:28:39 +02:00
parent d9bc5ec151
commit 9cff7bc3f4
3 changed files with 89 additions and 30 deletions
--- a/candle-core/src/dummy_cuda_backend.rs
+++ b/candle-core/src/dummy_cuda_backend.rs
@ -258,3 +258,13 @@ pub fn gemm_reduced_precision_bf16() -> bool {
 /// This bool controls whether reduced precision reductions (e.g., with fp16 accumulation type) are
 /// allowed with bf16 GEMMs.
 pub fn set_gemm_reduced_precision_bf16(_: bool) {}
+
+/// This bool controls whether reduced precision reductions (e.g., with tf32 accumulation type) are
+/// allowed with f32 GEMMs.
+pub fn gemm_reduced_precision_f32() -> bool {
+    true
+}
+
+/// This bool controls whether reduced precision reductions (e.g., with tf32 accumulation type) are
+/// allowed with f32 GEMMs.
+pub fn set_gemm_reduced_precision_f32(_b: bool) {}