From 53510ce427160674268199349b22bbb62cd2b9ba Mon Sep 17 00:00:00 2001 From: Laurent Mazare Date: Fri, 29 Sep 2023 08:06:54 +0200 Subject: [PATCH] Use a silu activation in mistral. (#991) --- candle-nn/src/activation.rs | 4 ++++ candle-transformers/src/models/mistral.rs | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/candle-nn/src/activation.rs b/candle-nn/src/activation.rs index 1e67ed53..ddc211a7 100644 --- a/candle-nn/src/activation.rs +++ b/candle-nn/src/activation.rs @@ -9,6 +9,8 @@ pub enum Activation { #[serde(rename = "gated-gelu")] NewGelu, Relu, + Silu, + Sigmoid, Elu(f64), LeakyRelu(f64), } @@ -20,6 +22,8 @@ impl super::Module for Activation { // https://github.com/huggingface/transformers/blob/12f043eaeaabfef6f6efea411d98e6f6d3c094b7/src/transformers/activations.py#L49-L78 Self::NewGelu => xs.gelu(), Self::Relu => xs.relu(), + Self::Silu => crate::ops::silu(xs), + Self::Sigmoid => crate::ops::sigmoid(xs), &Self::Elu(alpha) => xs.elu(alpha), &Self::LeakyRelu(negative_slope) => crate::ops::leaky_relu(xs, negative_slope), } diff --git a/candle-transformers/src/models/mistral.rs b/candle-transformers/src/models/mistral.rs index 245150e7..346fda89 100644 --- a/candle-transformers/src/models/mistral.rs +++ b/candle-transformers/src/models/mistral.rs @@ -28,7 +28,7 @@ impl Config { num_hidden_layers: 32, num_attention_heads: 32, num_key_value_heads: 8, - hidden_act: Activation::Gelu, // TODO: silu + hidden_act: Activation::Silu, max_position_embeddings: 32768, rms_norm_eps: 1e-5, rope_theta: 10_000.,