use candle::Tensor; use serde::Deserialize; #[derive(Debug, Clone, Copy, PartialEq, Deserialize, Default)] #[serde(rename_all = "lowercase")] pub enum Activation { #[default] Gelu, #[serde(rename = "gated-gelu")] NewGelu, Relu, Relu2, Relu6, Silu, Sigmoid, Elu(f64), LeakyRelu(f64), } impl super::Module for Activation { fn forward(&self, xs: &Tensor) -> candle::Result { match self { Self::Gelu => xs.gelu_erf(), // https://github.com/huggingface/transformers/blob/12f043eaeaabfef6f6efea411d98e6f6d3c094b7/src/transformers/activations.py#L49-L78 Self::NewGelu => xs.gelu(), Self::Relu => xs.relu(), Self::Relu2 => xs.relu()?.sqr(), Self::Relu6 => xs.clamp(0f32, 6f32), Self::Silu => crate::ops::silu(xs), Self::Sigmoid => crate::ops::sigmoid(xs), &Self::Elu(alpha) => xs.elu(alpha), &Self::LeakyRelu(negative_slope) => crate::ops::leaky_relu(xs, negative_slope), } } }