Fix the gumbel softmax by casting to f32. (#2928)

2025-06-15 02:16:37 +00:00 · 2025-04-28 19:48:51 +02:00
parent e98754fc5a
commit d4bac37a61
1 changed files with 9 additions and 6 deletions
--- a/candle-nn/src/sampling.rs
+++ b/candle-nn/src/sampling.rs
@ -8,13 +8,16 @@ pub fn gumbel_softmax<D: candle::shape::Dim>(
 ) -> Result<Tensor> {
    if temperature <= 0.0 {
        logits.argmax(dim)
-    } else if temperature == 1.0 {
-        let minus_g = logits.rand_like(1e-7, 0.999)?.log()?.neg()?.log()?;
-        let sampled = (logits - minus_g)?.argmax(dim)?;
-        Ok(sampled)
    } else {
+        // Cast to f32, doing the Gumbel softmax in bf16 is a bit unstable.
+        let logits = logits.to_dtype(candle::DType::F32)?;
        let minus_g = logits.rand_like(1e-7, 0.999)?.log()?.neg()?.log()?;
-        let sampled = (logits + minus_g * (-temperature))?.argmax(dim)?;
-        Ok(sampled)
+        if temperature == 1.0 {
+            let sampled = (logits - minus_g)?.argmax(dim)?;
+            Ok(sampled)
+        } else {
+            let sampled = (logits + minus_g * (-temperature))?.argmax(dim)?;
+            Ok(sampled)
+        }
    }
 }