Add a Dropout layer (#676)

* Add a dropout layer. * Add an actual layer.
2025-06-17 19:18:50 +00:00 · 2023-08-30 16:19:28 +01:00
parent ad8a62dbf5
commit 3159982a89
2 changed files with 36 additions and 0 deletions
--- a/candle-nn/src/lib.rs
+++ b/candle-nn/src/lib.rs
@ -23,6 +23,7 @@ pub use group_norm::{group_norm, GroupNorm};
 pub use init::Init;
 pub use layer_norm::{layer_norm, rms_norm, LayerNorm, LayerNormConfig, RmsNorm};
 pub use linear::{linear, linear_no_bias, Linear};
+pub use ops::Dropout;
 pub use optim::{AdamW, ParamsAdamW, SGD};
 pub use rnn::{lstm, LSTM, RNN};
 pub use var_builder::VarBuilder;
--- a/candle-nn/src/ops.rs
+++ b/candle-nn/src/ops.rs
@ -42,3 +42,38 @@ pub fn sigmoid(xs: &Tensor) -> Result<Tensor> {
    // TODO: Should we have a specialized op for this?
    (xs.neg()?.exp()? + 1.0)?.recip()
 }
+
+pub fn dropout(xs: &Tensor, drop_p: f32) -> Result<Tensor> {
+    // This implementation is inefficient as it stores the full mask for the backward pass.
+    // Instead we could just store the seed and have a specialized kernel that would both
+    // generate the random mask and apply it.
+    // Another easier optimization would be to be able to generate boolean mask using just a bit of
+    // entropy per element rather than generating a full float per element.
+    if !(0. ..1.).contains(&drop_p) {
+        candle::bail!("dropout probability has to be in [0, 1), got {drop_p}")
+    }
+    let rand = Tensor::rand(0f32, 1f32, xs.shape(), xs.device())?;
+    let scale = 1.0 / (1.0 - drop_p as f64);
+    let drop_p = Tensor::new(drop_p, xs.device())?.broadcast_as(xs.shape())?;
+    let mask = (rand.ge(&drop_p)? * scale)?.to_dtype(xs.dtype())?;
+    xs * mask
+}
+
+#[derive(Debug)]
+pub struct Dropout {
+    drop_p: f32,
+}
+
+impl Dropout {
+    pub fn new(drop_p: f32) -> Dropout {
+        Self { drop_p }
+    }
+
+    pub fn forward(&self, xs: &Tensor, train: bool) -> Result<Tensor> {
+        if train {
+            dropout(xs, self.drop_p)
+        } else {
+            Ok(xs.clone())
+        }
+    }
+}