From 1ce3843cab25813ef5451ad270ad771f4227a6df Mon Sep 17 00:00:00 2001 From: laurent Date: Wed, 28 Jun 2023 09:38:54 +0100 Subject: [PATCH] Add the relu op. --- candle-core/src/backprop.rs | 2 ++ candle-core/src/op.rs | 36 +++++++++++++++++++++++++++++++----- candle-core/src/tensor.rs | 1 + candle-kernels/src/unary.cu | 17 +++++++++++++---- 4 files changed, 47 insertions(+), 9 deletions(-) diff --git a/candle-core/src/backprop.rs b/candle-core/src/backprop.rs index 3137dce8..bc6740cf 100644 --- a/candle-core/src/backprop.rs +++ b/candle-core/src/backprop.rs @@ -70,6 +70,7 @@ impl Tensor { | Op::Sqr(node) | Op::Sqrt(node) | Op::Gelu(node) + | Op::Relu(node) | Op::Exp(node) | Op::Log(node) | Op::Sin(node) @@ -210,6 +211,7 @@ impl Tensor { } Op::Reshape(_arg) => return Err(Error::BackwardNotSupported { op: "reshape" }), Op::Gelu(_) => return Err(Error::BackwardNotSupported { op: "gelu" }), + Op::Relu(_) => return Err(Error::BackwardNotSupported { op: "relu" }), Op::Sqr(arg) => { let arg_grad = arg.mul(&grad)?.affine(2., 0.)?; let sum_grad = grads.or_insert(arg)?; diff --git a/candle-core/src/op.rs b/candle-core/src/op.rs index 5eab6b1f..7b0e18fe 100644 --- a/candle-core/src/op.rs +++ b/candle-core/src/op.rs @@ -37,6 +37,7 @@ pub(crate) enum Op { ToDevice(Tensor), Transpose(Tensor, usize, usize), Gelu(Tensor), + Relu(Tensor), // TODO: Support for custom ops. } @@ -81,6 +82,7 @@ pub(crate) struct Neg; pub(crate) struct Sqr; pub(crate) struct Sqrt; pub(crate) struct Gelu; +pub(crate) struct Relu; macro_rules! bin_op { ($op:ident, $name: literal, $e: expr) => { @@ -189,9 +191,33 @@ impl UnaryOp for Gelu { fn u32(_: u32) -> u32 { 0 } - const KERNEL_BF16: &'static str = "gelu_bf16"; - const KERNEL_F16: &'static str = "gelu_f16"; - const KERNEL_F32: &'static str = "gelu_f32"; - const KERNEL_F64: &'static str = "gelu_f64"; - const KERNEL_U32: &'static str = "gelu_u32"; + const KERNEL_BF16: &'static str = "ugelu_bf16"; + const KERNEL_F16: &'static str = "ugelu_f16"; + const KERNEL_F32: &'static str = "ugelu_f32"; + const KERNEL_F64: &'static str = "ugelu_f64"; + const KERNEL_U32: &'static str = "ugelu_u32"; +} + +impl UnaryOp for Relu { + const NAME: &'static str = "relu"; + const KERNEL_BF16: &'static str = "urelu_bf16"; + const KERNEL_F16: &'static str = "urelu_f16"; + const KERNEL_F32: &'static str = "urelu_f32"; + const KERNEL_F64: &'static str = "urelu_f64"; + const KERNEL_U32: &'static str = "urelu_u32"; + fn bf16(v: bf16) -> bf16 { + v.max(bf16::ZERO) + } + fn f16(v: f16) -> f16 { + v.max(f16::ZERO) + } + fn f32(v: f32) -> f32 { + v.max(0f32) + } + fn f64(v: f64) -> f64 { + v.max(0f64) + } + fn u32(v: u32) -> u32 { + v + } } diff --git a/candle-core/src/tensor.rs b/candle-core/src/tensor.rs index 95254bab..4cff4efc 100644 --- a/candle-core/src/tensor.rs +++ b/candle-core/src/tensor.rs @@ -320,6 +320,7 @@ impl Tensor { unary_op!(sqr, Sqr); unary_op!(sqrt, Sqrt); unary_op!(gelu, Gelu); + unary_op!(relu, Relu); pub fn to_scalar(&self) -> Result { if self.rank() != 0 { return Err(Error::UnexpectedNumberOfDims { diff --git a/candle-kernels/src/unary.cu b/candle-kernels/src/unary.cu index 03f9353b..7af5b388 100644 --- a/candle-kernels/src/unary.cu +++ b/candle-kernels/src/unary.cu @@ -26,13 +26,19 @@ extern "C" __global__ void FN_NAME( \ } \ template -__device__ T gelu_fwd(T x) { +__device__ __forceinline__ T gelu_fwd(T x) { T x_sq = x * x; T x_cube = x_sq * x; T alpha = x + static_cast(0.044715) * x_cube; return static_cast(0.5) * x * (static_cast(1.0) + tanhg(static_cast(M_2_SQRTPI * M_SQRT1_2) * alpha)); } +template +__device__ __forceinline__ T relu_fwd(T x) { + T zero = 0.; + return maxg(x, zero); +} + #if __CUDA_ARCH__ >= 530 UNARY_OP(__half, ucopy_f16, x) @@ -44,7 +50,8 @@ UNARY_OP(__half, ucos_f16, cosg(x)) UNARY_OP(__half, uabs_f16, absg(x)) UNARY_OP(__half, usqr_f16, x*x) UNARY_OP(__half, usqrt_f16, sqrtg(x)) -UNARY_OP(__half, gelu_f16, gelu_fwd(x)) +UNARY_OP(__half, ugelu_f16, gelu_fwd(x)) +UNARY_OP(__half, urelu_f16, relu_fwd(x)) #endif UNARY_OP(float, ucopy_f32, x) @@ -65,5 +72,7 @@ UNARY_OP(float, usqr_f32, x*x) UNARY_OP(double, usqr_f64, x*x) UNARY_OP(float, usqrt_f32, sqrtg(x)) UNARY_OP(double, usqrt_f64, sqrtg(x)) -UNARY_OP(float, gelu_f32, gelu_fwd(x)) -UNARY_OP(double, gelu_f64, gelu_fwd(x)) +UNARY_OP(float, ugelu_f32, gelu_fwd(x)) +UNARY_OP(double, ugelu_f64, gelu_fwd(x)) +UNARY_OP(float, urelu_f32, relu_fwd(x)) +UNARY_OP(double, urelu_f64, relu_fwd(x))