Add the swiglu activation from the chatglm PR. (#1246)

This commit is contained in:
Laurent Mazare
2023-11-02 20:01:34 +01:00
committed by GitHub
parent e08fbb6543
commit a2a20aeecc
2 changed files with 7 additions and 0 deletions

View File

@ -14,6 +14,7 @@ pub enum Activation {
Silu, Silu,
Sigmoid, Sigmoid,
HardSigmoid, HardSigmoid,
Swiglu,
Swish, Swish,
HardSwish, HardSwish,
Elu(f64), Elu(f64),
@ -32,6 +33,7 @@ impl super::Module for Activation {
Self::Silu => crate::ops::silu(xs), Self::Silu => crate::ops::silu(xs),
Self::Sigmoid => crate::ops::sigmoid(xs), Self::Sigmoid => crate::ops::sigmoid(xs),
Self::HardSigmoid => crate::ops::hard_sigmoid(xs), Self::HardSigmoid => crate::ops::hard_sigmoid(xs),
Self::Swiglu => crate::ops::swiglu(xs),
Self::Swish => xs * crate::ops::sigmoid(xs)?, Self::Swish => xs * crate::ops::sigmoid(xs)?,
Self::HardSwish => xs * crate::ops::hard_sigmoid(xs)?, Self::HardSwish => xs * crate::ops::hard_sigmoid(xs)?,
&Self::Elu(alpha) => xs.elu(alpha), &Self::Elu(alpha) => xs.elu(alpha),

View File

@ -39,6 +39,11 @@ pub fn silu(xs: &Tensor) -> Result<Tensor> {
xs / (xs.neg()?.exp()? + 1.0)? xs / (xs.neg()?.exp()? + 1.0)?
} }
pub fn swiglu(xs: &Tensor) -> Result<Tensor> {
let xs = xs.chunk(2, candle::D::Minus1)?;
crate::ops::silu(&xs[0])? * &xs[1]
}
pub fn sigmoid(xs: &Tensor) -> Result<Tensor> { pub fn sigmoid(xs: &Tensor) -> Result<Tensor> {
// TODO: Should we have a specialized op for this? // TODO: Should we have a specialized op for this?
(xs.neg()?.exp()? + 1.0)?.recip() (xs.neg()?.exp()? + 1.0)?.recip()