Use the gelu-erf activation. (#969)

This commit is contained in:
Laurent Mazare
2023-09-26 22:30:21 +01:00
committed by GitHub
parent 4abc1ea34d
commit ce0a4e3a85
3 changed files with 5 additions and 9 deletions

View File

@ -25,10 +25,8 @@ impl HiddenActLayer {
fn forward(&self, xs: &Tensor) -> candle::Result<Tensor> {
let _enter = self.span.enter();
match self.act {
// TODO: The all-MiniLM-L6-v2 model uses "gelu" whereas this is "gelu_new", this explains some
// small numerical difference.
// https://github.com/huggingface/transformers/blob/cd4584e3c809bb9e1392ccd3fe38b40daba5519a/src/transformers/activations.py#L213
HiddenAct::Gelu => xs.gelu(),
HiddenAct::Gelu => xs.gelu_erf(),
HiddenAct::Relu => xs.relu(),
}
}