Creating Gelu op (no backward).

2025-06-17 02:58:50 +00:00 · 2023-06-22 21:56:46 +02:00
parent 4ffdeb4e23
commit fd21c708ab
3 changed files with 43 additions and 0 deletions
--- a/kernels/src/unary.cu
+++ b/kernels/src/unary.cu
@ -17,11 +17,22 @@ extern "C" __global__ void FN_NAME( \
    } \
 } \
 template<typename T>
 __device__ T gelu_fwd(T x) {
    constexpr T fastCoeff = 0.044715;
    T x_sq = x * x;
    T x_cube = x_sq * x;
    T alpha = x + fastCoeff * x_cube;
    return 0.5 * x * (1.0 + tanhg(M_2_SQRTPI * M_SQRT1_2 * alpha));
 }
 #if __CUDA_ARCH__ >= 530
 UNARY_OP(__half, ucopy_f16, x)
 UNARY_OP(__half, uneg_f16, -x)
 UNARY_OP(__half, usqr_f16, x*x)
 UNARY_OP(__half, usqrt_f16, sqrtg(x))
 // UNARY_OP(__half, gelu_f16, gelu_fwd(x))
 #endif
 UNARY_OP(float, ucopy_f32, x)
@ -32,3 +43,4 @@ UNARY_OP(float, usqr_f32, x*x)
 UNARY_OP(float, usqr_f64, x*x)
 UNARY_OP(float, usqrt_f32, sqrtg(x))
 UNARY_OP(float, usqrt_f64, sqrtg(x))
 UNARY_OP(float, gelu_f32, gelu_fwd(x))
--- a/src/op.rs
+++ b/src/op.rs
@ -22,6 +22,7 @@ pub(crate) enum Op {
    Sqrt(Tensor),
    ToDevice(Tensor),
    Transpose(Tensor, usize, usize),
    Gelu(Tensor),
    // TODO: Support for custom ops.
 }
@ -52,6 +53,7 @@ pub(crate) struct Sub;
 pub(crate) struct Neg;
 pub(crate) struct Sqr;
 pub(crate) struct Sqrt;
 pub(crate) struct Gelu;
 impl BinaryOp for Add {
    const NAME: &'static str = "add";
@ -136,3 +138,29 @@ impl UnaryOp for Sqrt {
    const KERNEL_F32: &'static str = "usqrt_f32";
    const KERNEL_F64: &'static str = "usqrt_f64";
 }
 /// `gelu` operation
 /// <https://en.wikipedia.org/wiki/Activation_function#Comparison_of_activation_functions>
 #[inline]
 pub fn gelu_f32(v: f32) -> f32 {
    0.5 * (v)
        * (1.0 + f32::tanh((2.0f32 / std::f32::consts::PI).sqrt() * v * (1.0 + 0.044715 * v * v)))
 }
 /// `gelu` operation
 /// <https://en.wikipedia.org/wiki/Activation_function#Comparison_of_activation_functions>
 #[inline]
 pub fn gelu_f64(v: f64) -> f64 {
    0.5 * (v)
        * (1.0 + f64::tanh((2.0f64 / std::f64::consts::PI).sqrt() * v * (1.0 + 0.044715 * v * v)))
 }
 impl UnaryOp for Gelu {
    const NAME: &'static str = "gelu";
    fn f32(v1: f32) -> f32 {
        gelu_f32(v1)
    }
    fn f64(v1: f64) -> f64 {
        gelu_f64(v1)
    }
    const KERNEL_F32: &'static str = "gelu_f32";
    const KERNEL_F64: &'static str = "gelu_f64";
 }
--- a/src/tensor.rs
+++ b/src/tensor.rs
@ -240,6 +240,7 @@ impl Tensor {
    unary_op!(neg, Neg);
    unary_op!(sqr, Sqr);
    unary_op!(sqrt, Sqrt);
    unary_op!(gelu, Gelu);
    pub fn to_scalar<S: crate::WithDType>(&self) -> Result<S> {
        if self.rank() != 0 {
            return Err(Error::UnexpectedNumberOfDims {
@ -766,6 +767,7 @@ impl Tensor {
                    | Op::Transpose(node, _, _)
                    | Op::Sqr(node)
                    | Op::Sqrt(node)
                    | Op::Gelu(node)
                    | Op::Neg(node) => {
                        let (tg, nodes) = walk(node, nodes, already_seen);
                        track_grad |= tg;
@ -854,6 +856,7 @@ impl Tensor {
                        *sum_grad = sum_grad.add(&arg_grad)?
                    }
                    Op::Reshape(_arg) => return Err(Error::BackwardNotSupported { op: "reshape" }),
                    Op::Gelu(_) => return Err(Error::BackwardNotSupported { op: "reshape" }),
                    Op::Sqr(arg) => {
                        let arg_grad = arg.mul(&grad)?.affine(2., 0.)?;
                        let sum_grad = grads.or_insert(arg)?;