mirror of
https://github.com/huggingface/candle.git
synced 2025-06-16 10:38:54 +00:00
Add the recip op + use it in stable-diffusion. (#331)
* Add the recip unary op. * Fix the cuda kernel. * Use the recip op in sigmoid.
This commit is contained in:
@ -80,6 +80,7 @@ extern "C" __global__ void FN_NAME( \
|
||||
#if __CUDA_ARCH__ >= 800
|
||||
UNARY_OP(__nv_bfloat16, ucopy_bf16, x)
|
||||
UNARY_OP(__nv_bfloat16, uneg_bf16, -x)
|
||||
UNARY_OP(__nv_bfloat16, urecip_bf16, recipg(x))
|
||||
UNARY_OP(__nv_bfloat16, uexp_bf16, expg(x))
|
||||
UNARY_OP(__nv_bfloat16, ulog_bf16, logg(x))
|
||||
UNARY_OP(__nv_bfloat16, usin_bf16, sing(x))
|
||||
@ -95,6 +96,7 @@ UNARY_OP1(__nv_bfloat16, uelu_bf16, elu_fwd(x, param))
|
||||
#if __CUDA_ARCH__ >= 530
|
||||
UNARY_OP(__half, ucopy_f16, x)
|
||||
UNARY_OP(__half, uneg_f16, -x)
|
||||
UNARY_OP(__half, urecip_f16, recipg(x))
|
||||
UNARY_OP(__half, uexp_f16, expg(x))
|
||||
UNARY_OP(__half, ulog_f16, logg(x))
|
||||
UNARY_OP(__half, usin_f16, sing(x))
|
||||
@ -113,6 +115,8 @@ UNARY_OP(float, ucopy_f32, x)
|
||||
UNARY_OP(double, ucopy_f64, x)
|
||||
UNARY_OP(float, uneg_f32, -x)
|
||||
UNARY_OP(double, uneg_f64, -x)
|
||||
UNARY_OP(float, urecip_f32, recipg(x))
|
||||
UNARY_OP(double, urecip_f64, recipg(x))
|
||||
UNARY_OP(float, uexp_f32, expg(x))
|
||||
UNARY_OP(double, uexp_f64, expg(x))
|
||||
UNARY_OP(float, ulog_f32, logg(x))
|
||||
|
Reference in New Issue
Block a user