mirror of
https://github.com/huggingface/candle.git
synced 2025-06-18 03:28:50 +00:00
Add a Dropout layer (#676)
* Add a dropout layer. * Add an actual layer.
This commit is contained in:
@ -23,6 +23,7 @@ pub use group_norm::{group_norm, GroupNorm};
|
|||||||
pub use init::Init;
|
pub use init::Init;
|
||||||
pub use layer_norm::{layer_norm, rms_norm, LayerNorm, LayerNormConfig, RmsNorm};
|
pub use layer_norm::{layer_norm, rms_norm, LayerNorm, LayerNormConfig, RmsNorm};
|
||||||
pub use linear::{linear, linear_no_bias, Linear};
|
pub use linear::{linear, linear_no_bias, Linear};
|
||||||
|
pub use ops::Dropout;
|
||||||
pub use optim::{AdamW, ParamsAdamW, SGD};
|
pub use optim::{AdamW, ParamsAdamW, SGD};
|
||||||
pub use rnn::{lstm, LSTM, RNN};
|
pub use rnn::{lstm, LSTM, RNN};
|
||||||
pub use var_builder::VarBuilder;
|
pub use var_builder::VarBuilder;
|
||||||
|
@ -42,3 +42,38 @@ pub fn sigmoid(xs: &Tensor) -> Result<Tensor> {
|
|||||||
// TODO: Should we have a specialized op for this?
|
// TODO: Should we have a specialized op for this?
|
||||||
(xs.neg()?.exp()? + 1.0)?.recip()
|
(xs.neg()?.exp()? + 1.0)?.recip()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn dropout(xs: &Tensor, drop_p: f32) -> Result<Tensor> {
|
||||||
|
// This implementation is inefficient as it stores the full mask for the backward pass.
|
||||||
|
// Instead we could just store the seed and have a specialized kernel that would both
|
||||||
|
// generate the random mask and apply it.
|
||||||
|
// Another easier optimization would be to be able to generate boolean mask using just a bit of
|
||||||
|
// entropy per element rather than generating a full float per element.
|
||||||
|
if !(0. ..1.).contains(&drop_p) {
|
||||||
|
candle::bail!("dropout probability has to be in [0, 1), got {drop_p}")
|
||||||
|
}
|
||||||
|
let rand = Tensor::rand(0f32, 1f32, xs.shape(), xs.device())?;
|
||||||
|
let scale = 1.0 / (1.0 - drop_p as f64);
|
||||||
|
let drop_p = Tensor::new(drop_p, xs.device())?.broadcast_as(xs.shape())?;
|
||||||
|
let mask = (rand.ge(&drop_p)? * scale)?.to_dtype(xs.dtype())?;
|
||||||
|
xs * mask
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Dropout {
|
||||||
|
drop_p: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Dropout {
|
||||||
|
pub fn new(drop_p: f32) -> Dropout {
|
||||||
|
Self { drop_p }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn forward(&self, xs: &Tensor, train: bool) -> Result<Tensor> {
|
||||||
|
if train {
|
||||||
|
dropout(xs, self.drop_p)
|
||||||
|
} else {
|
||||||
|
Ok(xs.clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user