Apply the cast before the scaling. (#2135)

2025-06-17 11:08:52 +00:00 · 2024-04-28 08:30:35 +02:00
parent 805f3be8e1
commit e5c8b88f90
1 changed files with 1 additions and 1 deletions
--- a/candle-nn/src/ops.rs
+++ b/candle-nn/src/ops.rs
@ -70,7 +70,7 @@ pub fn dropout(xs: &Tensor, drop_p: f32) -> Result<Tensor> {
    let rand = Tensor::rand(0f32, 1f32, xs.shape(), xs.device())?;
    let scale = 1.0 / (1.0 - drop_p as f64);
    let drop_p = Tensor::new(drop_p, xs.device())?.broadcast_as(xs.shape())?;
-    let mask = (rand.ge(&drop_p)? * scale)?.to_dtype(xs.dtype())?;
+    let mask = (rand.ge(&drop_p)?.to_dtype(xs.dtype())? * scale)?;
    xs * mask
 }