derivative for GELU (#1160)

* derivative for GELU

* add tests
This commit is contained in:
KGrewal1
2023-10-23 20:23:45 +01:00
committed by GitHub
parent eae94a451b
commit 807e3f9f52
2 changed files with 22 additions and 1 deletions

View File

@ -471,7 +471,15 @@ impl Tensor {
Op::Unary(_, UnaryOp::Round) => { Op::Unary(_, UnaryOp::Round) => {
Err(Error::BackwardNotSupported { op: "round" })? Err(Error::BackwardNotSupported { op: "round" })?
} }
Op::Unary(_, UnaryOp::Gelu) => Err(Error::BackwardNotSupported { op: "gelu" })?, Op::Unary(arg, UnaryOp::Gelu) => {
let sum_grad = grads.or_insert(arg)?;
let cube = arg.powf(3.)?;
let tanh = (0.0356774 * &cube + (0.797885 * arg)?)?.tanh()?;
let gelu_grad = (((0.5 * &tanh)?
+ (0.0535161 * cube + (0.398942 * arg)?)? * (1. - tanh.powf(2.)?))?
+ 0.5)?;
*sum_grad = sum_grad.add(&(&grad * gelu_grad)?)?
}
Op::Unary(_, UnaryOp::Erf) => Err(Error::BackwardNotSupported { op: "erf" })?, Op::Unary(_, UnaryOp::Erf) => Err(Error::BackwardNotSupported { op: "erf" })?,
Op::Unary(_, UnaryOp::GeluErf) => { Op::Unary(_, UnaryOp::GeluErf) => {
Err(Error::BackwardNotSupported { op: "gelu-erf" })? Err(Error::BackwardNotSupported { op: "gelu-erf" })?

View File

@ -192,6 +192,19 @@ fn unary_grad(device: &Device) -> Result<()> {
test_utils::to_vec1_round(grad_x, 2)?, test_utils::to_vec1_round(grad_x, 2)?,
[0.01, 0.42, 0.0, 0.98], [0.01, 0.42, 0.0, 0.98],
); );
// testing compared to pytorch nn.GELU(approximate = 'tanh')
let y = x.gelu()?;
let grads = y.backward()?;
let grad_x = grads.get(&x).context("no grad for x")?;
assert_eq!(
test_utils::to_vec1_round(&y, 4)?,
[2.9964, 0.8412, 3.9999, 0.0839]
);
assert_eq!(
test_utils::to_vec1_round(grad_x, 4)?,
[1.0116, 1.0830, 1.0003, 0.6188],
);
Ok(()) Ok(())
} }