lavc/riscv: add forward-edge CFI landing pads

This commit is contained in:
Rémi Denis-Courmont 2024-07-22 22:17:40 +03:00
parent a14d21a446
commit f2c30fe15a
49 changed files with 194 additions and 0 deletions

View File

@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_abs_pow34_rvv, zve32f, zba func ff_abs_pow34_rvv, zve32f, zba
lpad 0
1: 1:
vsetvli t0, a2, e32, m8, ta, ma vsetvli t0, a2, e32, m8, ta, ma
sub a2, a2, t0 sub a2, a2, t0
@ -39,6 +40,7 @@ func ff_abs_pow34_rvv, zve32f, zba
endfunc endfunc
func ff_aac_quant_bands_rvv, zve32f, zba func ff_aac_quant_bands_rvv, zve32f, zba
lpad 0
NOHWF fmv.w.x fa0, a6 NOHWF fmv.w.x fa0, a6
NOHWF fmv.w.x fa1, a7 NOHWF fmv.w.x fa1, a7
fcvt.s.w ft0, a5 fcvt.s.w ft0, a5

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_ps_add_squares_rvv, zve64f, zba func ff_ps_add_squares_rvv, zve64f, zba
lpad 0
li t1, 32 li t1, 32
1: 1:
vsetvli t0, a2, e32, m4, ta, ma vsetvli t0, a2, e32, m4, ta, ma
@ -40,6 +41,7 @@ func ff_ps_add_squares_rvv, zve64f, zba
endfunc endfunc
func ff_ps_mul_pair_single_rvv, zve32f, zba func ff_ps_mul_pair_single_rvv, zve32f, zba
lpad 0
1: 1:
vsetvli t0, a3, e32, m4, ta, ma vsetvli t0, a3, e32, m4, ta, ma
vlseg2e32.v v24, (a1) vlseg2e32.v v24, (a1)
@ -57,6 +59,7 @@ func ff_ps_mul_pair_single_rvv, zve32f, zba
endfunc endfunc
func ff_ps_hybrid_analysis_rvv, zve32f func ff_ps_hybrid_analysis_rvv, zve32f
lpad 0
/* We need 26 FP registers, for 20 scratch ones. Spill fs0-fs5. */ /* We need 26 FP registers, for 20 scratch ones. Spill fs0-fs5. */
addi sp, sp, -48 addi sp, sp, -48
.irp n, 0, 1, 2, 3, 4, 5 .irp n, 0, 1, 2, 3, 4, 5
@ -135,6 +138,7 @@ NOHWD flw fs\n, (4 * \n)(sp)
endfunc endfunc
func ff_ps_hybrid_analysis_ileave_rvv, zve32x /* no zve32f here */, zba func ff_ps_hybrid_analysis_ileave_rvv, zve32x /* no zve32f here */, zba
lpad 0
slli t0, a2, 5 + 1 + 2 // ctz(32 * 2 * 4) slli t0, a2, 5 + 1 + 2 // ctz(32 * 2 * 4)
sh2add a1, a2, a1 sh2add a1, a2, a1
add a0, a0, t0 add a0, a0, t0
@ -208,6 +212,7 @@ func ff_ps_hybrid_synthesis_deint_rvv, zve64x, zba
endfunc endfunc
func ff_ps_stereo_interpolate_rvv, zve32f, b func ff_ps_stereo_interpolate_rvv, zve32f, b
lpad 0
vsetvli t0, zero, e32, m2, ta, ma vsetvli t0, zero, e32, m2, ta, ma
vid.v v24 vid.v v24
flw ft0, (a2) flw ft0, (a2)

View File

@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_ac3_exponent_min_rvb, zbb func ff_ac3_exponent_min_rvb, zbb
lpad 0
beqz a1, 3f beqz a1, 3f
1: 1:
addi a2, a2, -1 addi a2, a2, -1
@ -43,6 +44,7 @@ func ff_ac3_exponent_min_rvb, zbb
endfunc endfunc
func ff_extract_exponents_rvb, zbb func ff_extract_exponents_rvb, zbb
lpad 0
1: 1:
lw t0, (a1) lw t0, (a1)
addi a0, a0, 1 addi a0, a0, 1

View File

@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_ac3_exponent_min_rvv, zve32x func ff_ac3_exponent_min_rvv, zve32x
lpad 0
beqz a1, 3f beqz a1, 3f
1: 1:
vsetvli t2, a2, e8, m8, ta, ma vsetvli t2, a2, e8, m8, ta, ma
@ -44,6 +45,7 @@ func ff_ac3_exponent_min_rvv, zve32x
endfunc endfunc
func ff_float_to_fixed24_rvv, zve32f, zba func ff_float_to_fixed24_rvv, zve32f, zba
lpad 0
li t1, 1 << 24 li t1, 1 << 24
fcvt.s.w f0, t1 fcvt.s.w f0, t1
1: 1:
@ -62,6 +64,7 @@ endfunc
#if __riscv_xlen >= 64 #if __riscv_xlen >= 64
func ff_sum_square_butterfly_int32_rvv, zve64x, zba func ff_sum_square_butterfly_int32_rvv, zve64x, zba
lpad 0
vsetvli t0, zero, e64, m8, ta, ma vsetvli t0, zero, e64, m8, ta, ma
vmv.v.x v0, zero vmv.v.x v0, zero
vmv.v.x v8, zero vmv.v.x v8, zero
@ -102,6 +105,7 @@ endfunc
#endif #endif
func ff_sum_square_butterfly_float_rvv, zve32f, zba func ff_sum_square_butterfly_float_rvv, zve32f, zba
lpad 0
vsetvli t0, zero, e32, m8, ta, ma vsetvli t0, zero, e32, m8, ta, ma
vmv.v.x v0, zero vmv.v.x v0, zero
vmv.v.x v8, zero vmv.v.x v8, zero

View File

@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_extract_exponents_rvvb, zve32x, zvbb, zba func ff_extract_exponents_rvvb, zve32x, zvbb, zba
lpad 0
1: 1:
vsetvli t0, a2, e32, m8, ta, ma vsetvli t0, a2, e32, m8, ta, ma
vle32.v v8, (a1) vle32.v v8, (a1)

View File

@ -22,6 +22,7 @@
#if (__riscv_xlen == 64) #if (__riscv_xlen == 64)
func ff_alac_decorrelate_stereo_rvv, zve32x, zba func ff_alac_decorrelate_stereo_rvv, zve32x, zba
lpad 0
ld a4, 8(a0) ld a4, 8(a0)
ld a0, 0(a0) ld a0, 0(a0)
1: 1:
@ -44,6 +45,7 @@ func ff_alac_decorrelate_stereo_rvv, zve32x, zba
endfunc endfunc
func ff_alac_append_extra_bits_mono_rvv, zve32x, zba func ff_alac_append_extra_bits_mono_rvv, zve32x, zba
lpad 0
ld a0, (a0) ld a0, (a0)
ld a1, (a1) ld a1, (a1)
1: 1:
@ -62,6 +64,7 @@ func ff_alac_append_extra_bits_mono_rvv, zve32x, zba
endfunc endfunc
func ff_alac_append_extra_bits_stereo_rvv, zve32x, zba func ff_alac_append_extra_bits_stereo_rvv, zve32x, zba
lpad 0
ld a6, 8(a0) ld a6, 8(a0)
ld a0, (a0) ld a0, (a0)
ld a7, 8(a1) ld a7, 8(a1)

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_vector_clipf_rvf, f func ff_vector_clipf_rvf, f
lpad 0
NOHWF fmv.w.x fa0, a3 NOHWF fmv.w.x fa0, a3
NOHWF fmv.w.x fa1, a4 NOHWF fmv.w.x fa1, a4
1: 1:

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_scalarproduct_int16_rvv, zve32x, zba func ff_scalarproduct_int16_rvv, zve32x, zba
lpad 0
vsetvli t0, zero, e32, m8, ta, ma vsetvli t0, zero, e32, m8, ta, ma
vmv.v.x v8, zero vmv.v.x v8, zero
vmv.s.x v0, zero vmv.s.x v0, zero
@ -56,6 +57,7 @@ func ff_vector_clip_int32_rvv, zve32x, zba
endfunc endfunc
func ff_vector_clipf_rvv, zve32f, zba func ff_vector_clipf_rvv, zve32f, zba
lpad 0
NOHWF fmv.w.x fa0, a3 NOHWF fmv.w.x fa0, a3
NOHWF fmv.w.x fa1, a4 NOHWF fmv.w.x fa1, a4
1: 1:

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_clear_block_rvv, zve64x func ff_clear_block_rvv, zve64x
lpad 0
vsetivli zero, 16, e64, m8, ta, ma vsetivli zero, 16, e64, m8, ta, ma
vmv.v.i v0, 0 vmv.v.i v0, 0
vse64.v v0, (a0) vse64.v v0, (a0)
@ -29,6 +30,7 @@ func ff_clear_block_rvv, zve64x
endfunc endfunc
func ff_clear_blocks_rvv, zve64x func ff_clear_blocks_rvv, zve64x
lpad 0
vsetivli zero, 16, e64, m8, ta, ma vsetivli zero, 16, e64, m8, ta, ma
vmv.v.i v0, 0 vmv.v.i v0, 0
@ -42,6 +44,7 @@ func ff_clear_blocks_rvv, zve64x
endfunc endfunc
func ff_fill_block16_rvv, zve32x func ff_fill_block16_rvv, zve32x
lpad 0
vsetivli t0, 16, e8, m1, ta, ma vsetivli t0, 16, e8, m1, ta, ma
vmv.v.x v8, a1 vmv.v.x v8, a1
1: 1:
@ -54,6 +57,7 @@ func ff_fill_block16_rvv, zve32x
endfunc endfunc
func ff_fill_block8_rvv, zve64x func ff_fill_block8_rvv, zve64x
lpad 0
vsetvli t0, zero, e8, m4, ta, ma vsetvli t0, zero, e8, m4, ta, ma
vmv.v.x v8, a1 vmv.v.x v8, a1
vsetvli t0, a3, e64, m4, ta, ma vsetvli t0, a3, e64, m4, ta, ma

View File

@ -24,6 +24,7 @@
#if (__riscv_xlen >= 64) #if (__riscv_xlen >= 64)
func ff_bswap32_buf_rvb, zba, zbb func ff_bswap32_buf_rvb, zba, zbb
lpad 0
bswap32_rvb a0, a1, a2 bswap32_rvb a0, a1, a2
endfunc endfunc
#endif #endif

View File

@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_bswap16_buf_rvv, zve32x, zba func ff_bswap16_buf_rvv, zve32x, zba
lpad 0
1: 1:
vsetvli t0, a2, e16, m8, ta, ma vsetvli t0, a2, e16, m8, ta, ma
vle16.v v8, (a1) vle16.v v8, (a1)

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_reorder_pixels_rvv, zve32x, zba func ff_reorder_pixels_rvv, zve32x, zba
lpad 0
srai a2, a2, 1 srai a2, a2, 1
add t1, a1, a2 add t1, a1, a2
1: 1:

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_flac_lpc16_rvv, zve32x, b func ff_flac_lpc16_rvv, zve32x, b
lpad 0
vtype_vli t0, a2, t2, e32, ta, ma vtype_vli t0, a2, t2, e32, ta, ma
vsetvl zero, a2, t0 vsetvl zero, a2, t0
vle32.v v8, (a1) vle32.v v8, (a1)
@ -46,6 +47,7 @@ endfunc
#if (__riscv_xlen == 64) #if (__riscv_xlen == 64)
func ff_flac_lpc32_rvv, zve64x, zba func ff_flac_lpc32_rvv, zve64x, zba
lpad 0
addi t2, a2, -16 addi t2, a2, -16
ble t2, zero, ff_flac_lpc32_rvv_simple ble t2, zero, ff_flac_lpc32_rvv_simple
vsetivli zero, 1, e64, m1, ta, ma vsetivli zero, 1, e64, m1, ta, ma
@ -77,6 +79,7 @@ func ff_flac_lpc32_rvv, zve64x, zba
endfunc endfunc
func ff_flac_lpc32_rvv_simple, zve64x, b func ff_flac_lpc32_rvv_simple, zve64x, b
lpad 0
vtype_vli t3, a2, t1, e64, ta, ma vtype_vli t3, a2, t1, e64, ta, ma
vntypei t2, t3 vntypei t2, t3
vsetvl zero, a2, t3 // e64 vsetvl zero, a2, t3 // e64
@ -105,6 +108,7 @@ func ff_flac_lpc32_rvv_simple, zve64x, b
endfunc endfunc
func ff_flac_lpc33_rvv, zve64x, b func ff_flac_lpc33_rvv, zve64x, b
lpad 0
vtype_vli t0, a3, t1, e64, ta, ma vtype_vli t0, a3, t1, e64, ta, ma
vsetvl zero, a3, t0 vsetvl zero, a3, t0
vmv.s.x v0, zero vmv.s.x v0, zero
@ -133,6 +137,7 @@ endfunc
#endif #endif
func ff_flac_wasted32_rvv, zve32x, zba func ff_flac_wasted32_rvv, zve32x, zba
lpad 0
1: 1:
vsetvli t0, a2, e32, m8, ta, ma vsetvli t0, a2, e32, m8, ta, ma
vle32.v v8, (a0) vle32.v v8, (a0)
@ -146,6 +151,7 @@ func ff_flac_wasted32_rvv, zve32x, zba
endfunc endfunc
func ff_flac_wasted33_rvv, zve64x, zba func ff_flac_wasted33_rvv, zve64x, zba
lpad 0
srli t0, a2, 5 srli t0, a2, 5
li t1, 1 li t1, 1
bnez t0, 2f bnez t0, 2f
@ -178,6 +184,7 @@ endfunc
#if (__riscv_xlen == 64) #if (__riscv_xlen == 64)
func ff_flac_decorrelate_indep2_16_rvv, zve32x, zba func ff_flac_decorrelate_indep2_16_rvv, zve32x, zba
lpad 0
ld a0, (a0) ld a0, (a0)
ld a2, 8(a1) ld a2, 8(a1)
ld a1, (a1) ld a1, (a1)
@ -201,6 +208,7 @@ func ff_flac_decorrelate_indep2_16_rvv, zve32x, zba
endfunc endfunc
func ff_flac_decorrelate_indep4_16_rvv, zve32x, zba func ff_flac_decorrelate_indep4_16_rvv, zve32x, zba
lpad 0
ld a0, (a0) ld a0, (a0)
ld a2, 8(a1) ld a2, 8(a1)
ld t1, 16(a1) ld t1, 16(a1)
@ -234,6 +242,7 @@ func ff_flac_decorrelate_indep4_16_rvv, zve32x, zba
endfunc endfunc
func ff_flac_decorrelate_indep6_16_rvv, zve32x, zba func ff_flac_decorrelate_indep6_16_rvv, zve32x, zba
lpad 0
ld a0, (a0) ld a0, (a0)
ld a2, 8(a1) ld a2, 8(a1)
ld t1, 16(a1) ld t1, 16(a1)
@ -279,6 +288,7 @@ func ff_flac_decorrelate_indep6_16_rvv, zve32x, zba
endfunc endfunc
func ff_flac_decorrelate_indep8_16_rvv, zve32x, zba func ff_flac_decorrelate_indep8_16_rvv, zve32x, zba
lpad 0
ld a0, (a0) ld a0, (a0)
ld a2, 8(a1) ld a2, 8(a1)
ld t1, 16(a1) ld t1, 16(a1)
@ -333,6 +343,7 @@ func ff_flac_decorrelate_indep8_16_rvv, zve32x, zba
endfunc endfunc
func ff_flac_decorrelate_ls_16_rvv, zve32x, zba func ff_flac_decorrelate_ls_16_rvv, zve32x, zba
lpad 0
ld a0, (a0) ld a0, (a0)
ld a2, 8(a1) ld a2, 8(a1)
ld a1, (a1) ld a1, (a1)
@ -357,6 +368,7 @@ func ff_flac_decorrelate_ls_16_rvv, zve32x, zba
endfunc endfunc
func ff_flac_decorrelate_rs_16_rvv, zve32x, zba func ff_flac_decorrelate_rs_16_rvv, zve32x, zba
lpad 0
ld a0, (a0) ld a0, (a0)
ld a2, 8(a1) ld a2, 8(a1)
ld a1, (a1) ld a1, (a1)
@ -381,6 +393,7 @@ func ff_flac_decorrelate_rs_16_rvv, zve32x, zba
endfunc endfunc
func ff_flac_decorrelate_ms_16_rvv, zve32x, zba func ff_flac_decorrelate_ms_16_rvv, zve32x, zba
lpad 0
ld a0, (a0) ld a0, (a0)
ld a2, 8(a1) ld a2, 8(a1)
ld a1, (a1) ld a1, (a1)
@ -407,6 +420,7 @@ func ff_flac_decorrelate_ms_16_rvv, zve32x, zba
endfunc endfunc
func ff_flac_decorrelate_indep2_32_rvv, zve32x, zba func ff_flac_decorrelate_indep2_32_rvv, zve32x, zba
lpad 0
ld a0, (a0) ld a0, (a0)
ld a2, 8(a1) ld a2, 8(a1)
ld a1, (a1) ld a1, (a1)
@ -427,6 +441,7 @@ func ff_flac_decorrelate_indep2_32_rvv, zve32x, zba
endfunc endfunc
func ff_flac_decorrelate_indep4_32_rvv, zve32x, zba func ff_flac_decorrelate_indep4_32_rvv, zve32x, zba
lpad 0
ld a0, (a0) ld a0, (a0)
ld a2, 8(a1) ld a2, 8(a1)
ld t1, 16(a1) ld t1, 16(a1)
@ -456,6 +471,7 @@ func ff_flac_decorrelate_indep4_32_rvv, zve32x, zba
endfunc endfunc
func ff_flac_decorrelate_indep6_32_rvv, zve32x, zba func ff_flac_decorrelate_indep6_32_rvv, zve32x, zba
lpad 0
ld a0, (a0) ld a0, (a0)
ld a2, 8(a1) ld a2, 8(a1)
ld t1, 16(a1) ld t1, 16(a1)
@ -494,6 +510,7 @@ func ff_flac_decorrelate_indep6_32_rvv, zve32x, zba
endfunc endfunc
func ff_flac_decorrelate_indep8_32_rvv, zve32x, zba func ff_flac_decorrelate_indep8_32_rvv, zve32x, zba
lpad 0
ld a0, (a0) ld a0, (a0)
ld a2, 8(a1) ld a2, 8(a1)
ld t1, 16(a1) ld t1, 16(a1)
@ -539,6 +556,7 @@ func ff_flac_decorrelate_indep8_32_rvv, zve32x, zba
endfunc endfunc
func ff_flac_decorrelate_ls_32_rvv, zve32x, zba func ff_flac_decorrelate_ls_32_rvv, zve32x, zba
lpad 0
ld a0, (a0) ld a0, (a0)
ld a2, 8(a1) ld a2, 8(a1)
ld a1, (a1) ld a1, (a1)
@ -560,6 +578,7 @@ func ff_flac_decorrelate_ls_32_rvv, zve32x, zba
endfunc endfunc
func ff_flac_decorrelate_rs_32_rvv, zve32x, zba func ff_flac_decorrelate_rs_32_rvv, zve32x, zba
lpad 0
ld a0, (a0) ld a0, (a0)
ld a2, 8(a1) ld a2, 8(a1)
ld a1, (a1) ld a1, (a1)
@ -581,6 +600,7 @@ func ff_flac_decorrelate_rs_32_rvv, zve32x, zba
endfunc endfunc
func ff_flac_decorrelate_ms_32_rvv, zve32x, zba func ff_flac_decorrelate_ms_32_rvv, zve32x, zba
lpad 0
ld a0, (a0) ld a0, (a0)
ld a2, 8(a1) ld a2, 8(a1)
ld a1, (a1) ld a1, (a1)

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_int32_to_float_fmul_scalar_rvv, zve32f, zba func ff_int32_to_float_fmul_scalar_rvv, zve32f, zba
lpad 0
NOHWF fmv.w.x fa0, a2 NOHWF fmv.w.x fa0, a2
NOHWF mv a2, a3 NOHWF mv a2, a3
1: 1:
@ -38,6 +39,7 @@ NOHWF mv a2, a3
endfunc endfunc
func ff_int32_to_float_fmul_array8_rvv, zve32f, zba func ff_int32_to_float_fmul_array8_rvv, zve32f, zba
lpad 0
srai a4, a4, 3 srai a4, a4, 3
1: vsetvli t0, a4, e32, m1, ta, ma 1: vsetvli t0, a4, e32, m1, ta, ma

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_g722_apply_qmf_rvv, zve32x func ff_g722_apply_qmf_rvv, zve32x
lpad 0
lla t0, qmf_coeffs lla t0, qmf_coeffs
vsetivli zero, 12, e16, m2, ta, ma vsetivli zero, 12, e16, m2, ta, ma
vlseg2e16.v v28, (a0) vlseg2e16.v v28, (a0)

View File

@ -23,6 +23,7 @@
.option push .option push
.option norelax .option norelax
func ff_h263_h_loop_filter_rvv, zve32x func ff_h263_h_loop_filter_rvv, zve32x
lpad 0
addi a0, a0, -2 addi a0, a0, -2
vsetivli zero, 8, e8, mf2, ta, ma vsetivli zero, 8, e8, mf2, ta, ma
vlsseg4e8.v v8, (a0), a1 vlsseg4e8.v v8, (a0), a1
@ -83,6 +84,7 @@ endfunc
.option pop .option pop
func ff_h263_v_loop_filter_rvv, zve32x func ff_h263_v_loop_filter_rvv, zve32x
lpad 0
sub a4, a0, a1 sub a4, a0, a1
vsetivli zero, 8, e8, mf2, ta, ma vsetivli zero, 8, e8, mf2, ta, ma
vle8.v v10, (a0) vle8.v v10, (a0)

View File

@ -325,6 +325,7 @@
ret ret
.endm .endm
.variant_cc h264_put_chroma_mc_rvv
func h264_put_chroma_mc_rvv, zve32x, zba func h264_put_chroma_mc_rvv, zve32x, zba
11: 11:
li a7, 3 li a7, 3
@ -334,6 +335,7 @@ func h264_put_chroma_mc_rvv, zve32x, zba
do_chroma_mc put 0 do_chroma_mc put 0
endfunc endfunc
.variant_cc h264_avg_chroma_mc_rvv
func h264_avg_chroma_mc_rvv, zve32x, zba func h264_avg_chroma_mc_rvv, zve32x, zba
21: 21:
li a7, 3 li a7, 3
@ -344,31 +346,37 @@ func h264_avg_chroma_mc_rvv, zve32x, zba
endfunc endfunc
func h264_put_chroma_mc8_rvv, zve32x func h264_put_chroma_mc8_rvv, zve32x
lpad 0
li t6, 8 li t6, 8
j 11b j 11b
endfunc endfunc
func h264_put_chroma_mc4_rvv, zve32x func h264_put_chroma_mc4_rvv, zve32x
lpad 0
li t6, 4 li t6, 4
j 11b j 11b
endfunc endfunc
func h264_put_chroma_mc2_rvv, zve32x func h264_put_chroma_mc2_rvv, zve32x
lpad 0
li t6, 2 li t6, 2
j 11b j 11b
endfunc endfunc
func h264_avg_chroma_mc8_rvv, zve32x func h264_avg_chroma_mc8_rvv, zve32x
lpad 0
li t6, 8 li t6, 8
j 21b j 21b
endfunc endfunc
func h264_avg_chroma_mc4_rvv, zve32x func h264_avg_chroma_mc4_rvv, zve32x
lpad 0
li t6, 4 li t6, 4
j 21b j 21b
endfunc endfunc
func h264_avg_chroma_mc2_rvv, zve32x func h264_avg_chroma_mc2_rvv, zve32x
lpad 0
li t6, 2 li t6, 2
j 21b j 21b
endfunc endfunc

View File

@ -37,6 +37,7 @@
.endm .endm
func ff_h264_add_pixels4_8_rvv, zve32x func ff_h264_add_pixels4_8_rvv, zve32x
lpad 0
vsetivli zero, 4, e8, mf4, ta, ma vsetivli zero, 4, e8, mf4, ta, ma
vlse32.v v8, (a0), a2 vlse32.v v8, (a0), a2
vsetivli zero, 4 * 4, e8, m1, ta, ma vsetivli zero, 4 * 4, e8, m1, ta, ma
@ -54,6 +55,7 @@ func ff_h264_add_pixels4_8_rvv, zve32x
endfunc endfunc
func ff_h264_add_pixels4_16_rvv, zve64x func ff_h264_add_pixels4_16_rvv, zve64x
lpad 0
vsetivli zero, 4, e16, mf2, ta, ma vsetivli zero, 4, e16, mf2, ta, ma
vlse64.v v8, (a0), a2 vlse64.v v8, (a0), a2
vsetivli zero, 4 * 4, e16, m2, ta, ma vsetivli zero, 4 * 4, e16, m2, ta, ma
@ -71,6 +73,7 @@ func ff_h264_add_pixels4_16_rvv, zve64x
endfunc endfunc
func ff_h264_add_pixels8_8_rvv, zve64x func ff_h264_add_pixels8_8_rvv, zve64x
lpad 0
li t0, 8 * 8 li t0, 8 * 8
vsetivli zero, 8, e8, mf2, ta, ma vsetivli zero, 8, e8, mf2, ta, ma
vlse64.v v8, (a0), a2 vlse64.v v8, (a0), a2
@ -89,6 +92,7 @@ func ff_h264_add_pixels8_8_rvv, zve64x
endfunc endfunc
func ff_h264_add_pixels8_16_rvv, zve32x func ff_h264_add_pixels8_16_rvv, zve32x
lpad 0
li t0, 8 li t0, 8
vsetivli zero, 8, e16, m1, ta, ma vsetivli zero, 8, e16, m1, ta, ma
1: 1:

View File

@ -164,6 +164,7 @@ endfunc
.irp w, 16, 8, 4, 2 .irp w, 16, 8, 4, 2
func ff_h264_weight_pixels\w\()_8_rvv, zve32x func ff_h264_weight_pixels\w\()_8_rvv, zve32x
lpad 0
li a6, \w li a6, \w
.if \w == 16 .if \w == 16
j ff_h264_weight_pixels_simple_8_rvv j ff_h264_weight_pixels_simple_8_rvv
@ -173,6 +174,7 @@ func ff_h264_weight_pixels\w\()_8_rvv, zve32x
endfunc endfunc
func ff_h264_biweight_pixels\w\()_8_rvv, zve32x func ff_h264_biweight_pixels\w\()_8_rvv, zve32x
lpad 0
li t6, \w li t6, \w
.if \w == 16 .if \w == 16
j ff_h264_biweight_pixels_simple_8_rvv j ff_h264_biweight_pixels_simple_8_rvv
@ -272,6 +274,7 @@ func ff_h264_loop_filter_luma_8_rvv, zve32x
endfunc endfunc
func ff_h264_v_loop_filter_luma_8_rvv, zve32x func ff_h264_v_loop_filter_luma_8_rvv, zve32x
lpad 0
vsetivli zero, 4, e32, m1, ta, ma vsetivli zero, 4, e32, m1, ta, ma
vle8.v v4, (a4) vle8.v v4, (a4)
li t0, 0x01010101 li t0, 0x01010101
@ -299,6 +302,7 @@ func ff_h264_v_loop_filter_luma_8_rvv, zve32x
endfunc endfunc
func ff_h264_h_loop_filter_luma_8_rvv, zve32x func ff_h264_h_loop_filter_luma_8_rvv, zve32x
lpad 0
vsetivli zero, 4, e32, m1, ta, ma vsetivli zero, 4, e32, m1, ta, ma
vle8.v v4, (a4) vle8.v v4, (a4)
li t0, 0x01010101 li t0, 0x01010101
@ -313,6 +317,7 @@ func ff_h264_h_loop_filter_luma_8_rvv, zve32x
endfunc endfunc
func ff_h264_h_loop_filter_luma_mbaff_8_rvv, zve32x func ff_h264_h_loop_filter_luma_mbaff_8_rvv, zve32x
lpad 0
vsetivli zero, 4, e16, mf2, ta, ma vsetivli zero, 4, e16, mf2, ta, ma
vle8.v v4, (a4) vle8.v v4, (a4)
li t0, 0x0101 li t0, 0x0101

View File

@ -55,6 +55,7 @@ func ff_h264_idct4_rvv, zve32x
endfunc endfunc
func ff_h264_idct_add_8_rvv, zve32x func ff_h264_idct_add_8_rvv, zve32x
lpad 0
csrwi vxrm, 0 csrwi vxrm, 0
.Lidct_add4_8_rvv: .Lidct_add4_8_rvv:
vsetivli zero, 4, e16, mf2, ta, ma vsetivli zero, 4, e16, mf2, ta, ma
@ -213,6 +214,7 @@ func ff_h264_idct8_rvv, zve32x
endfunc endfunc
func ff_h264_idct8_add_8_rvv, zve32x func ff_h264_idct8_add_8_rvv, zve32x
lpad 0
csrwi vxrm, 0 csrwi vxrm, 0
.Lidct8_add_8_rvv: .Lidct8_add_8_rvv:
vsetivli zero, 8, e16, m1, ta, ma vsetivli zero, 8, e16, m1, ta, ma
@ -405,11 +407,13 @@ endfunc
.irp depth, 9, 10, 12, 14 .irp depth, 9, 10, 12, 14
func ff_h264_idct_add_\depth\()_rvv, zve32x func ff_h264_idct_add_\depth\()_rvv, zve32x
lpad 0
li a5, (1 << \depth) - 1 li a5, (1 << \depth) - 1
j ff_h264_idct_add_16_rvv j ff_h264_idct_add_16_rvv
endfunc endfunc
func ff_h264_idct8_add_\depth\()_rvv, zve32x func ff_h264_idct8_add_\depth\()_rvv, zve32x
lpad 0
li a5, (1 << \depth) - 1 li a5, (1 << \depth) - 1
j ff_h264_idct8_add_16_rvv j ff_h264_idct8_add_16_rvv
endfunc endfunc
@ -417,6 +421,7 @@ endfunc
.macro idct_dc_add8 width .macro idct_dc_add8 width
func ff_h264_idct\width\()_dc_add_8_rvv, zve64x func ff_h264_idct\width\()_dc_add_8_rvv, zve64x
lpad 0
.if \width == 8 .if \width == 8
vsetivli zero, \width, e8, mf2, ta, ma vsetivli zero, \width, e8, mf2, ta, ma
.else .else
@ -517,11 +522,13 @@ idct_dc_add 8
.irp depth,9,10,12,14 .irp depth,9,10,12,14
func ff_h264_idct4_dc_add_\depth\()_rvv, zve64x func ff_h264_idct4_dc_add_\depth\()_rvv, zve64x
lpad 0
li a5, (1 << \depth) - 1 li a5, (1 << \depth) - 1
j ff_h264_idct4_dc_add_16_rvv j ff_h264_idct4_dc_add_16_rvv
endfunc endfunc
func ff_h264_idct8_dc_add_\depth\()_rvv, zve64x func ff_h264_idct8_dc_add_\depth\()_rvv, zve64x
lpad 0
li a5, (1 << \depth) - 1 li a5, (1 << \depth) - 1
j ff_h264_idct8_dc_add_16_rvv j ff_h264_idct8_dc_add_16_rvv
endfunc endfunc
@ -534,6 +541,9 @@ endconst
.macro idct4_adds type, depth .macro idct4_adds type, depth
func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x, b func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x, b
.if \depth == 8
lpad 0
.endif
csrwi vxrm, 0 csrwi vxrm, 0
lla t0, ff_h264_scan8 lla t0, ff_h264_scan8
li t1, 32 * (\depth / 8) li t1, 32 * (\depth / 8)
@ -609,6 +619,9 @@ idct4_adds 16intra, \depth
#if (__riscv_xlen == 64) #if (__riscv_xlen == 64)
func ff_h264_idct8_add4_\depth\()_rvv, zve32x, b func ff_h264_idct8_add4_\depth\()_rvv, zve32x, b
.if \depth == 8
lpad 0
.endif
csrwi vxrm, 0 csrwi vxrm, 0
addi sp, sp, -48 addi sp, sp, -48
lla t0, ff_h264_scan8 lla t0, ff_h264_scan8
@ -686,17 +699,20 @@ endfunc
.irp depth, 9, 10, 12, 14 .irp depth, 9, 10, 12, 14
func ff_h264_idct_add16_\depth\()_rvv, zve32x func ff_h264_idct_add16_\depth\()_rvv, zve32x
lpad 0
li a5, (1 << \depth) - 1 li a5, (1 << \depth) - 1
j ff_h264_idct_add16_16_rvv j ff_h264_idct_add16_16_rvv
endfunc endfunc
func ff_h264_idct_add16intra_\depth\()_rvv, zve32x func ff_h264_idct_add16intra_\depth\()_rvv, zve32x
lpad 0
li a5, (1 << \depth) - 1 li a5, (1 << \depth) - 1
j ff_h264_idct_add16intra_16_rvv j ff_h264_idct_add16intra_16_rvv
endfunc endfunc
#if (__riscv_xlen == 64) #if (__riscv_xlen == 64)
func ff_h264_idct8_add4_\depth\()_rvv, zve32x func ff_h264_idct8_add4_\depth\()_rvv, zve32x
lpad 0
li a5, (1 << \depth) - 1 li a5, (1 << \depth) - 1
j ff_h264_idct8_add4_16_rvv j ff_h264_idct8_add4_16_rvv
endfunc endfunc

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_add_int16_rvv, zve32x, zba func ff_add_int16_rvv, zve32x, zba
lpad 0
1: 1:
vsetvli t0, a3, e16, m8, ta, ma vsetvli t0, a3, e16, m8, ta, ma
vle16.v v16, (a0) vle16.v v16, (a0)
@ -37,6 +38,7 @@ func ff_add_int16_rvv, zve32x, zba
endfunc endfunc
func ff_add_hfyu_left_pred_bgr32_rvv, zve32x, b func ff_add_hfyu_left_pred_bgr32_rvv, zve32x, b
lpad 0
vtype_ivli t1, 4, e8, ta, ma vtype_ivli t1, 4, e8, ta, ma
li t0, 4 li t0, 4
vsetvl zero, t0, t1 vsetvl zero, t0, t1

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_put_pixels_clamped_rvv, zve64x func ff_put_pixels_clamped_rvv, zve64x
lpad 0
li t0, 8 * 8 li t0, 8 * 8
vsetvli zero, t0, e16, m8, ta, ma vsetvli zero, t0, e16, m8, ta, ma
vle16.v v24, (a0) vle16.v v24, (a0)
@ -35,6 +36,7 @@ func ff_put_pixels_clamped_rvv, zve64x
endfunc endfunc
func ff_put_signed_pixels_clamped_rvv, zve64x func ff_put_signed_pixels_clamped_rvv, zve64x
lpad 0
li t0, 8 * 8 li t0, 8 * 8
vsetvli zero, t0, e8, m4, ta, ma vsetvli zero, t0, e8, m4, ta, ma
vle16.v v24, (a0) vle16.v v24, (a0)
@ -47,6 +49,7 @@ func ff_put_signed_pixels_clamped_rvv, zve64x
endfunc endfunc
func ff_add_pixels_clamped_rvv, zve64x func ff_add_pixels_clamped_rvv, zve64x
lpad 0
vsetivli zero, 8, e8, mf2, ta, ma vsetivli zero, 8, e8, mf2, ta, ma
li t0, 8 * 8 li t0, 8 * 8
vlse64.v v16, (a1), a2 vlse64.v v16, (a1), a2

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_ict_float_rvv, zve32f, zba func ff_ict_float_rvv, zve32f, zba
lpad 0
lla t0, ff_jpeg2000_f_ict_params lla t0, ff_jpeg2000_f_ict_params
flw ft0, 0(t0) flw ft0, 0(t0)
flw ft1, 4(t0) flw ft1, 4(t0)
@ -49,6 +50,7 @@ func ff_ict_float_rvv, zve32f, zba
endfunc endfunc
func ff_rct_int_rvv, zve32x, zba func ff_rct_int_rvv, zve32x, zba
lpad 0
1: 1:
vsetvli t0, a3, e32, m8, ta, ma vsetvli t0, a3, e32, m8, ta, ma
vle32.v v16, (a1) vle32.v v16, (a1)

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_scalarproduct_and_madd_int16_rvv, zve32x, zba func ff_scalarproduct_and_madd_int16_rvv, zve32x, zba
lpad 0
vsetvli t0, zero, e32, m8, ta, ma vsetvli t0, zero, e32, m8, ta, ma
vmv.v.x v0, zero vmv.v.x v0, zero
1: 1:
@ -45,6 +46,7 @@ func ff_scalarproduct_and_madd_int16_rvv, zve32x, zba
endfunc endfunc
func ff_scalarproduct_and_madd_int32_rvv, zve32x, zba func ff_scalarproduct_and_madd_int32_rvv, zve32x, zba
lpad 0
vsetvli t0, zero, e32, m8, ta, ma vsetvli t0, zero, e32, m8, ta, ma
vmv.v.x v0, zero vmv.v.x v0, zero
1: 1:

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_llvid_add_bytes_rvv, zve32x func ff_llvid_add_bytes_rvv, zve32x
lpad 0
1: 1:
vsetvli t0, a2, e8, m8, ta, ma vsetvli t0, a2, e8, m8, ta, ma
vle8.v v0, (a1) vle8.v v0, (a1)

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_llvidenc_diff_bytes_rvv, zve32x func ff_llvidenc_diff_bytes_rvv, zve32x
lpad 0
1: 1:
vsetvli t0, a3, e8, m8, ta, ma vsetvli t0, a3, e8, m8, ta, ma
vle8.v v0, (a1) vle8.v v0, (a1)

View File

@ -22,6 +22,7 @@
#if __riscv_xlen >= 64 #if __riscv_xlen >= 64
func ff_lpc_apply_welch_window_rvv, zve64d, zba func ff_lpc_apply_welch_window_rvv, zve64d, zba
lpad 0
vsetvli t0, zero, e64, m8, ta, ma vsetvli t0, zero, e64, m8, ta, ma
vid.v v0 vid.v v0
addi t2, a1, -1 addi t2, a1, -1
@ -87,6 +88,7 @@ func ff_lpc_apply_welch_window_rvv, zve64d, zba
endfunc endfunc
func ff_lpc_compute_autocorr_rvv, zve64d, b func ff_lpc_compute_autocorr_rvv, zve64d, b
lpad 0
vtype_vli t1, a2, t2, e64, ta, ma, 1 vtype_vli t1, a2, t2, e64, ta, ma, 1
addi a2, a2, 1 addi a2, a2, 1
li t0, 1 li t0, 1

View File

@ -27,6 +27,7 @@
.endm .endm
func ff_pix_abs16_rvv, zve32x func ff_pix_abs16_rvv, zve32x
lpad 0
vsetivli zero, 1, e32, m1, ta, ma vsetivli zero, 1, e32, m1, ta, ma
vmv.s.x v0, zero vmv.s.x v0, zero
1: 1:
@ -47,6 +48,7 @@ func ff_pix_abs16_rvv, zve32x
endfunc endfunc
func ff_pix_abs8_rvv, zve32x func ff_pix_abs8_rvv, zve32x
lpad 0
vsetivli zero, 1, e32, m1, ta, ma vsetivli zero, 1, e32, m1, ta, ma
vmv.s.x v0, zero vmv.s.x v0, zero
1: 1:
@ -67,6 +69,7 @@ func ff_pix_abs8_rvv, zve32x
endfunc endfunc
func ff_pix_abs16_x2_rvv, zve32x func ff_pix_abs16_x2_rvv, zve32x
lpad 0
csrwi vxrm, 0 csrwi vxrm, 0
vsetivli zero, 1, e32, m1, ta, ma vsetivli zero, 1, e32, m1, ta, ma
li t5, 1 li t5, 1
@ -92,6 +95,7 @@ func ff_pix_abs16_x2_rvv, zve32x
endfunc endfunc
func ff_pix_abs8_x2_rvv, zve32x func ff_pix_abs8_x2_rvv, zve32x
lpad 0
csrwi vxrm, 0 csrwi vxrm, 0
vsetivli zero, 1, e32, m1, ta, ma vsetivli zero, 1, e32, m1, ta, ma
li t5, 1 li t5, 1
@ -117,6 +121,7 @@ func ff_pix_abs8_x2_rvv, zve32x
endfunc endfunc
func ff_pix_abs16_y2_rvv, zve32x func ff_pix_abs16_y2_rvv, zve32x
lpad 0
csrwi vxrm, 0 csrwi vxrm, 0
vsetivli zero, 1, e32, m1, ta, ma vsetivli zero, 1, e32, m1, ta, ma
add t1, a2, a3 add t1, a2, a3
@ -142,6 +147,7 @@ func ff_pix_abs16_y2_rvv, zve32x
endfunc endfunc
func ff_pix_abs8_y2_rvv, zve32x func ff_pix_abs8_y2_rvv, zve32x
lpad 0
csrwi vxrm, 0 csrwi vxrm, 0
vsetivli zero, 1, e32, m1, ta, ma vsetivli zero, 1, e32, m1, ta, ma
add t1, a2, a3 add t1, a2, a3
@ -167,6 +173,7 @@ func ff_pix_abs8_y2_rvv, zve32x
endfunc endfunc
func ff_sse16_rvv, zve32x func ff_sse16_rvv, zve32x
lpad 0
vsetivli t0, 16, e32, m4, ta, ma vsetivli t0, 16, e32, m4, ta, ma
vmv.v.x v24, zero vmv.v.x v24, zero
vmv.s.x v0, zero vmv.s.x v0, zero
@ -189,6 +196,7 @@ func ff_sse16_rvv, zve32x
endfunc endfunc
func ff_sse8_rvv, zve32x func ff_sse8_rvv, zve32x
lpad 0
vsetivli t0, 8, e32, m2, ta, ma vsetivli t0, 8, e32, m2, ta, ma
vmv.v.x v24, zero vmv.v.x v24, zero
vmv.s.x v0, zero vmv.s.x v0, zero
@ -211,6 +219,7 @@ func ff_sse8_rvv, zve32x
endfunc endfunc
func ff_sse4_rvv, zve32x func ff_sse4_rvv, zve32x
lpad 0
vsetivli t0, 4, e32, m1, ta, ma vsetivli t0, 4, e32, m1, ta, ma
vmv.v.x v24, zero vmv.v.x v24, zero
vmv.s.x v0, zero vmv.s.x v0, zero
@ -239,6 +248,7 @@ endfunc
.endm .endm
.macro vsad_vsse16 type .macro vsad_vsse16 type
lpad 0
vsetivli t0, 16, e32, m4, ta, ma vsetivli t0, 16, e32, m4, ta, ma
addi a4, a4, -1 addi a4, a4, -1
add t1, a1, a3 add t1, a1, a3
@ -277,6 +287,7 @@ endfunc
.endm .endm
.macro vsad_vsse8 type .macro vsad_vsse8 type
lpad 0
vsetivli t0, 8, e32, m2, ta, ma vsetivli t0, 8, e32, m2, ta, ma
addi a4, a4, -1 addi a4, a4, -1
add t1, a1, a3 add t1, a1, a3
@ -315,6 +326,7 @@ endfunc
.endm .endm
.macro vsad_vsse_intra16 type .macro vsad_vsse_intra16 type
lpad 0
vsetivli t0, 16, e32, m4, ta, ma vsetivli t0, 16, e32, m4, ta, ma
addi a4, a4, -1 addi a4, a4, -1
add t1, a1, a3 add t1, a1, a3
@ -346,6 +358,7 @@ endfunc
.endm .endm
.macro vsad_vsse_intra8 type .macro vsad_vsse_intra8 type
lpad 0
vsetivli t0, 8, e32, m2, ta, ma vsetivli t0, 8, e32, m2, ta, ma
addi a4, a4, -1 addi a4, a4, -1
add t1, a1, a3 add t1, a1, a3
@ -409,6 +422,8 @@ func ff_vsad_intra8_rvv, zve32x
endfunc endfunc
func ff_nsse16_rvv, zve32x func ff_nsse16_rvv, zve32x
lpad 0
.macro squarediff16 .macro squarediff16
vsetivli zero, 16, e8, m1, tu, ma vsetivli zero, 16, e8, m1, tu, ma
vle8.v v4, (a1) vle8.v v4, (a1)
@ -468,6 +483,8 @@ func ff_nsse16_rvv, zve32x
endfunc endfunc
func ff_nsse8_rvv, zve32x func ff_nsse8_rvv, zve32x
lpad 0
.macro squarediff8 .macro squarediff8
vsetivli zero, 8, e8, mf2, tu, ma vsetivli zero, 8, e8, mf2, tu, ma
vle8.v v4, (a1) vle8.v v4, (a1)

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_opus_postfilter_rvv, zve32f, b func ff_opus_postfilter_rvv, zve32f, b
lpad 0
flw fa0, 0(a2) // g0 flw fa0, 0(a2) // g0
slli t1, a1, 2 slli t1, a1, 2
flw fa1, 4(a2) // g1 flw fa1, 4(a2) // g1

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_get_pixels_8_rvi func ff_get_pixels_8_rvi
lpad 0
.irp row, 0, 1, 2, 3, 4, 5, 6, 7 .irp row, 0, 1, 2, 3, 4, 5, 6, 7
ld t0, (a1) ld t0, (a1)
add a1, a1, a2 add a1, a1, a2
@ -47,6 +48,7 @@ func ff_get_pixels_8_rvi
endfunc endfunc
func ff_get_pixels_16_rvi func ff_get_pixels_16_rvi
lpad 0
.irp row, 0, 1, 2, 3, 4, 5, 6, 7 .irp row, 0, 1, 2, 3, 4, 5, 6, 7
ld t0, 0(a1) ld t0, 0(a1)
ld t1, 8(a1) ld t1, 8(a1)

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_get_pixels_8_rvv, zve64x func ff_get_pixels_8_rvv, zve64x
lpad 0
vsetivli zero, 8, e8, mf2, ta, ma vsetivli zero, 8, e8, mf2, ta, ma
li t0, 8 * 8 li t0, 8 * 8
1: 1:
@ -32,6 +33,7 @@ func ff_get_pixels_8_rvv, zve64x
endfunc endfunc
func ff_get_pixels_unaligned_8_rvv, zve64x func ff_get_pixels_unaligned_8_rvv, zve64x
lpad 0
andi t1, a1, 7 andi t1, a1, 7
vsetivli zero, 8, e64, m4, ta, ma vsetivli zero, 8, e64, m4, ta, ma
li t0, 8 * 8 li t0, 8 * 8
@ -52,6 +54,7 @@ func ff_get_pixels_unaligned_8_rvv, zve64x
endfunc endfunc
func ff_diff_pixels_rvv, zve64x func ff_diff_pixels_rvv, zve64x
lpad 0
vsetivli zero, 8, e8, mf2, ta, ma vsetivli zero, 8, e8, mf2, ta, ma
li t0, 8 * 8 li t0, 8 * 8
vlse64.v v16, (a1), a3 vlse64.v v16, (a1), a3
@ -63,6 +66,7 @@ func ff_diff_pixels_rvv, zve64x
endfunc endfunc
func ff_diff_pixels_unaligned_rvv, zve32x func ff_diff_pixels_unaligned_rvv, zve32x
lpad 0
vsetivli zero, 8, e8, mf2, ta, ma vsetivli zero, 8, e8, mf2, ta, ma
vlsseg8e8.v v16, (a1), a3 vlsseg8e8.v v16, (a1), a3
vlsseg8e8.v v24, (a2), a3 vlsseg8e8.v v24, (a2), a3

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_rv34_inv_transform_dc_rvv, zve32x func ff_rv34_inv_transform_dc_rvv, zve32x
lpad 0
lh t1, 0(a0) lh t1, 0(a0)
li t0, 13 * 13 * 3 li t0, 13 * 13 * 3
mul t2, t0, t1 mul t2, t0, t1
@ -33,6 +34,7 @@ func ff_rv34_inv_transform_dc_rvv, zve32x
endfunc endfunc
func ff_rv34_idct_dc_add_rvv, zve32x func ff_rv34_idct_dc_add_rvv, zve32x
lpad 0
vsetivli zero, 4, e8, mf4, ta, ma vsetivli zero, 4, e8, mf4, ta, ma
vlse32.v v0, (a0), a1 vlse32.v v0, (a0), a1
li t1, 169 li t1, 169

View File

@ -351,21 +351,25 @@ func ff_avg_rv40_chroma_mc_rvv, zve32x, zba
endfunc endfunc
func ff_put_rv40_chroma_mc8_rvv, zve32x func ff_put_rv40_chroma_mc8_rvv, zve32x
lpad 0
li t6, 8 li t6, 8
j 11b j 11b
endfunc endfunc
func ff_put_rv40_chroma_mc4_rvv, zve32x func ff_put_rv40_chroma_mc4_rvv, zve32x
lpad 0
li t6, 4 li t6, 4
j 11b j 11b
endfunc endfunc
func ff_avg_rv40_chroma_mc8_rvv, zve32x func ff_avg_rv40_chroma_mc8_rvv, zve32x
lpad 0
li t6, 8 li t6, 8
j 21b j 21b
endfunc endfunc
func ff_avg_rv40_chroma_mc4_rvv, zve32x func ff_avg_rv40_chroma_mc4_rvv, zve32x
lpad 0
li t6, 4 li t6, 4
j 21b j 21b
endfunc endfunc

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_sbr_sum64x5_rvv, zve32f, zba func ff_sbr_sum64x5_rvv, zve32f, zba
lpad 0
li a5, 64 li a5, 64
addi a1, a0, 64 * 4 addi a1, a0, 64 * 4
addi a2, a0, 128 * 4 addi a2, a0, 128 * 4
@ -50,6 +51,7 @@ func ff_sbr_sum64x5_rvv, zve32f, zba
endfunc endfunc
func ff_sbr_sum_square_rvv, zve32f, zba func ff_sbr_sum_square_rvv, zve32f, zba
lpad 0
vsetvli t0, zero, e32, m8, ta, ma vsetvli t0, zero, e32, m8, ta, ma
slli a1, a1, 1 slli a1, a1, 1
vmv.v.x v8, zero vmv.v.x v8, zero
@ -69,6 +71,7 @@ NOHWF fmv.x.w a0, fa0
endfunc endfunc
func ff_sbr_autocorrelate_rvv, zve32f func ff_sbr_autocorrelate_rvv, zve32f
lpad 0
vsetvli t0, zero, e32, m4, ta, ma vsetvli t0, zero, e32, m4, ta, ma
vmv.v.x v0, zero vmv.v.x v0, zero
flw fa0, (a0) flw fa0, (a0)
@ -158,6 +161,7 @@ func ff_sbr_autocorrelate_rvv, zve32f
endfunc endfunc
func ff_sbr_hf_gen_rvv, zve32f, zba func ff_sbr_hf_gen_rvv, zve32f, zba
lpad 0
NOHWF fmv.w.x fa0, a4 NOHWF fmv.w.x fa0, a4
NOHWF mv a4, a5 NOHWF mv a4, a5
NOHWF mv a5, a6 NOHWF mv a5, a6
@ -208,6 +212,7 @@ NOHWF mv a5, a6
endfunc endfunc
func ff_sbr_hf_g_filt_rvv, zve32f, zba func ff_sbr_hf_g_filt_rvv, zve32f, zba
lpad 0
li t1, 40 * 2 * 4 li t1, 40 * 2 * 4
sh3add a1, a4, a1 sh3add a1, a4, a1
1: 1:
@ -273,15 +278,18 @@ endfunc
.endm .endm
func ff_sbr_hf_apply_noise_0_rvv, zve32f, b func ff_sbr_hf_apply_noise_0_rvv, zve32f, b
lpad 0
hf_apply_noise 0 hf_apply_noise 0
endfunc endfunc
func ff_sbr_hf_apply_noise_3_rvv, zve32f, b func ff_sbr_hf_apply_noise_3_rvv, zve32f, b
lpad 0
not a4, a4 // invert parity of kx not a4, a4 // invert parity of kx
// fall through // fall through
endfunc endfunc
func ff_sbr_hf_apply_noise_1_rvv, zve32f, b func ff_sbr_hf_apply_noise_1_rvv, zve32f, b
lpad 0
vsetvli t0, zero, e32, m4, ta, ma vsetvli t0, zero, e32, m4, ta, ma
vid.v v4 vid.v v4
vxor.vx v4, v4, a4 vxor.vx v4, v4, a4
@ -290,5 +298,6 @@ func ff_sbr_hf_apply_noise_1_rvv, zve32f, b
endfunc endfunc
func ff_sbr_hf_apply_noise_2_rvv, zve32f, b func ff_sbr_hf_apply_noise_2_rvv, zve32f, b
lpad 0
hf_apply_noise 2 hf_apply_noise 2
endfunc endfunc

View File

@ -37,6 +37,7 @@
.endm .endm
func ff_startcode_find_candidate_rvb, zbb func ff_startcode_find_candidate_rvb, zbb
lpad 0
add a1, a0, a1 add a1, a0, a1
// Potentially unaligned head // Potentially unaligned head

View File

@ -27,6 +27,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_startcode_find_candidate_rvv, zve32x func ff_startcode_find_candidate_rvv, zve32x
lpad 0
mv t0, a0 mv t0, a0
1: 1:
vsetvli t1, a1, e8, m8, ta, ma vsetvli t1, a1, e8, m8, ta, ma

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_ssd_int8_vs_int16_rvv, zve32x, zba func ff_ssd_int8_vs_int16_rvv, zve32x, zba
lpad 0
vsetvli t0, zero, e32, m8, ta, ma vsetvli t0, zero, e32, m8, ta, ma
vmv.v.x v24, zero vmv.v.x v24, zero
1: 1:

View File

@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_decorrelate_ls_rvv, zve32x, zba func ff_decorrelate_ls_rvv, zve32x, zba
lpad 0
1: 1:
vsetvli t0, a2, e32, m8, ta, ma vsetvli t0, a2, e32, m8, ta, ma
sub a2, a2, t0 sub a2, a2, t0
@ -36,6 +37,7 @@ func ff_decorrelate_ls_rvv, zve32x, zba
endfunc endfunc
func ff_decorrelate_sr_rvv, zve32x, zba func ff_decorrelate_sr_rvv, zve32x, zba
lpad 0
1: 1:
vsetvli t0, a2, e32, m8, ta, ma vsetvli t0, a2, e32, m8, ta, ma
vle32.v v0, (a0) vle32.v v0, (a0)
@ -50,6 +52,7 @@ func ff_decorrelate_sr_rvv, zve32x, zba
endfunc endfunc
func ff_decorrelate_sm_rvv, zve32x, zba func ff_decorrelate_sm_rvv, zve32x, zba
lpad 0
1: 1:
vsetvli t0, a2, e32, m8, ta, ma vsetvli t0, a2, e32, m8, ta, ma
vle32.v v8, (a1) vle32.v v8, (a1)
@ -68,6 +71,7 @@ func ff_decorrelate_sm_rvv, zve32x, zba
endfunc endfunc
func ff_decorrelate_sf_rvv, zve32x, zba func ff_decorrelate_sf_rvv, zve32x, zba
lpad 0
csrwi vxrm, 0 csrwi vxrm, 0
1: 1:
vsetvli t0, a2, e32, m8, ta, ma vsetvli t0, a2, e32, m8, ta, ma

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_restore_rgb_planes_rvv, zve32x, zba func ff_restore_rgb_planes_rvv, zve32x, zba
lpad 0
li t1, -0x80 li t1, -0x80
sub a3, a3, a6 sub a3, a3, a6
sub a4, a4, a6 sub a4, a4, a6
@ -53,6 +54,7 @@ func ff_restore_rgb_planes_rvv, zve32x, zba
endfunc endfunc
func ff_restore_rgb_planes10_rvv, zve32x, zba func ff_restore_rgb_planes10_rvv, zve32x, zba
lpad 0
li t1, -0x200 li t1, -0x200
li t2, 0x3FF li t2, 0x3FF
sub a3, a3, a6 sub a3, a3, a6

View File

@ -22,6 +22,7 @@
#if __riscv_xlen >= 64 #if __riscv_xlen >= 64
func ff_put_pixels8x8_rvi func ff_put_pixels8x8_rvi
lpad 0
.rept 8 .rept 8
ld t0, (a1) ld t0, (a1)
sd t0, (a0) sd t0, (a0)
@ -33,6 +34,7 @@ func ff_put_pixels8x8_rvi
endfunc endfunc
func ff_put_pixels16x16_rvi func ff_put_pixels16x16_rvi
lpad 0
.rept 16 .rept 16
ld t0, (a1) ld t0, (a1)
ld t1, 8(a1) ld t1, 8(a1)

View File

@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_vc1_inv_trans_8x8_dc_rvv, zve64x, zba func ff_vc1_inv_trans_8x8_dc_rvv, zve64x, zba
lpad 0
lh t2, (a2) lh t2, (a2)
vsetivli zero, 8, e8, mf2, ta, ma vsetivli zero, 8, e8, mf2, ta, ma
vlse64.v v0, (a0), a1 vlse64.v v0, (a0), a1
@ -44,6 +45,7 @@ func ff_vc1_inv_trans_8x8_dc_rvv, zve64x, zba
endfunc endfunc
func ff_vc1_inv_trans_4x8_dc_rvv, zve32x, zba func ff_vc1_inv_trans_4x8_dc_rvv, zve32x, zba
lpad 0
lh t2, (a2) lh t2, (a2)
vsetivli zero, 8, e8, mf2, ta, ma vsetivli zero, 8, e8, mf2, ta, ma
vlse32.v v0, (a0), a1 vlse32.v v0, (a0), a1
@ -68,6 +70,7 @@ func ff_vc1_inv_trans_4x8_dc_rvv, zve32x, zba
endfunc endfunc
func ff_vc1_inv_trans_8x4_dc_rvv, zve64x, zba func ff_vc1_inv_trans_8x4_dc_rvv, zve64x, zba
lpad 0
lh t2, (a2) lh t2, (a2)
vsetivli zero, 4, e8, mf4, ta, ma vsetivli zero, 4, e8, mf4, ta, ma
vlse64.v v0, (a0), a1 vlse64.v v0, (a0), a1
@ -91,6 +94,7 @@ func ff_vc1_inv_trans_8x4_dc_rvv, zve64x, zba
endfunc endfunc
func ff_vc1_inv_trans_4x4_dc_rvv, zve32x func ff_vc1_inv_trans_4x4_dc_rvv, zve32x
lpad 0
lh t2, (a2) lh t2, (a2)
vsetivli zero, 4, e8, mf4, ta, ma vsetivli zero, 4, e8, mf4, ta, ma
vlse32.v v0, (a0), a1 vlse32.v v0, (a0), a1
@ -203,6 +207,7 @@ func ff_vc1_inv_trans_4_rvv, zve32x
endfunc endfunc
func ff_vc1_inv_trans_8x8_rvv, zve32x func ff_vc1_inv_trans_8x8_rvv, zve32x
lpad 0
csrwi vxrm, 0 csrwi vxrm, 0
vsetivli zero, 8, e16, m1, ta, ma vsetivli zero, 8, e16, m1, ta, ma
addi a1, a0, 1 * 8 * 2 addi a1, a0, 1 * 8 * 2
@ -240,6 +245,7 @@ func ff_vc1_inv_trans_8x8_rvv, zve32x
endfunc endfunc
func ff_vc1_inv_trans_8x4_rvv, zve32x func ff_vc1_inv_trans_8x4_rvv, zve32x
lpad 0
csrwi vxrm, 0 csrwi vxrm, 0
vsetivli zero, 4, e16, mf2, ta, ma vsetivli zero, 4, e16, mf2, ta, ma
vlseg8e16.v v0, (a2) vlseg8e16.v v0, (a2)
@ -285,6 +291,7 @@ func ff_vc1_inv_trans_8x4_rvv, zve32x
endfunc endfunc
func ff_vc1_inv_trans_4x8_rvv, zve32x func ff_vc1_inv_trans_4x8_rvv, zve32x
lpad 0
li a3, 8 * 2 li a3, 8 * 2
csrwi vxrm, 0 csrwi vxrm, 0
vsetivli zero, 8, e16, m1, ta, ma vsetivli zero, 8, e16, m1, ta, ma
@ -359,6 +366,7 @@ func ff_vc1_inv_trans_4x8_rvv, zve32x
endfunc endfunc
func ff_vc1_inv_trans_4x4_rvv, zve32x func ff_vc1_inv_trans_4x4_rvv, zve32x
lpad 0
li a3, 8 * 2 li a3, 8 * 2
csrwi vxrm, 0 csrwi vxrm, 0
vsetivli zero, 4, e16, mf2, ta, ma vsetivli zero, 4, e16, mf2, ta, ma
@ -422,12 +430,14 @@ endfunc
.endm .endm
func ff_avg_pixels16x16_rvv, zve32x func ff_avg_pixels16x16_rvv, zve32x
lpad 0
li t0, 16 li t0, 16
vsetivli zero, 16, e8, m1, ta, ma vsetivli zero, 16, e8, m1, ta, ma
j 1f j 1f
endfunc endfunc
func ff_avg_pixels8x8_rvv, zve32x func ff_avg_pixels8x8_rvv, zve32x
lpad 0
li t0, 8 li t0, 8
vsetivli zero, 8, e8, mf2, ta, ma vsetivli zero, 8, e8, mf2, ta, ma
1: 1:
@ -446,6 +456,7 @@ func ff_avg_pixels8x8_rvv, zve32x
endfunc endfunc
func ff_vc1_unescape_buffer_rvv, zve32x func ff_vc1_unescape_buffer_rvv, zve32x
lpad 0
vsetivli zero, 2, e8, m1, ta, ma vsetivli zero, 2, e8, m1, ta, ma
vmv.v.i v8, -1 vmv.v.i v8, -1
li t4, 1 li t4, 1

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_vorbis_inverse_coupling_rvv, zve32f, zba func ff_vorbis_inverse_coupling_rvv, zve32f, zba
lpad 0
fmv.w.x ft0, zero fmv.w.x ft0, zero
1: 1:
vsetvli t0, a2, e32, m4, ta, ma vsetvli t0, a2, e32, m4, ta, ma

View File

@ -22,6 +22,7 @@
#if __riscv_xlen >= 64 #if __riscv_xlen >= 64
func ff_vp7_luma_dc_wht_rvv, zve32x, zba func ff_vp7_luma_dc_wht_rvv, zve32x, zba
lpad 0
li a2, 4 * 16 * 2 li a2, 4 * 16 * 2
li a7, 16 * 2 li a7, 16 * 2
jal t0, 1f jal t0, 1f
@ -99,6 +100,7 @@ func ff_vp7_luma_dc_wht_rvv, zve32x, zba
endfunc endfunc
func ff_vp7_idct_add_rvv, zve32x func ff_vp7_idct_add_rvv, zve32x
lpad 0
jal t0, 1b jal t0, 1b
csrwi vxrm, 2 csrwi vxrm, 2
vsetvli zero, zero, e8, mf4, ta, ma vsetvli zero, zero, e8, mf4, ta, ma
@ -130,6 +132,7 @@ endfunc
.irp type, y, uv .irp type, y, uv
func ff_vp7_idct_dc_add4\type\()_rvv, zve32x func ff_vp7_idct_dc_add4\type\()_rvv, zve32x
lpad 0
li t0, 32 li t0, 32
vsetivli zero, 4, e16, mf2, ta, ma vsetivli zero, 4, e16, mf2, ta, ma
li t1, 23170 li t1, 23170

View File

@ -22,6 +22,7 @@
#if __riscv_xlen >= 64 #if __riscv_xlen >= 64
func ff_put_vp8_pixels16_rvi func ff_put_vp8_pixels16_rvi
lpad 0
1: 1:
addi a4, a4, -1 addi a4, a4, -1
ld t0, (a2) ld t0, (a2)
@ -36,6 +37,7 @@ func ff_put_vp8_pixels16_rvi
endfunc endfunc
func ff_put_vp8_pixels8_rvi func ff_put_vp8_pixels8_rvi
lpad 0
1: 1:
addi a4, a4, -1 addi a4, a4, -1
ld t0, (a2) ld t0, (a2)
@ -49,6 +51,7 @@ endfunc
#endif #endif
func ff_put_vp8_pixels4_rvi func ff_put_vp8_pixels4_rvi
lpad 0
1: 1:
addi a4, a4, -1 addi a4, a4, -1
lw t0, (a2) lw t0, (a2)

View File

@ -45,6 +45,7 @@
#if __riscv_xlen >= 64 #if __riscv_xlen >= 64
func ff_vp8_luma_dc_wht_rvv, zve64x func ff_vp8_luma_dc_wht_rvv, zve64x
lpad 0
vsetivli zero, 1, e64, m1, ta, ma vsetivli zero, 1, e64, m1, ta, ma
vlseg4e64.v v4, (a1) vlseg4e64.v v4, (a1)
vsetivli zero, 4, e16, mf2, ta, ma vsetivli zero, 4, e16, mf2, ta, ma
@ -99,6 +100,7 @@ endfunc
#endif #endif
func ff_vp8_idct_add_rvv, zve32x func ff_vp8_idct_add_rvv, zve32x
lpad 0
csrwi vxrm, 0 csrwi vxrm, 0
vsetivli zero, 4, e16, mf2, ta, ma vsetivli zero, 4, e16, mf2, ta, ma
addi a3, a1, 1 * 4 * 2 addi a3, a1, 1 * 4 * 2
@ -158,6 +160,7 @@ func ff_vp8_idct_add_rvv, zve32x
endfunc endfunc
func ff_vp8_idct_dc_add_rvv, zve32x func ff_vp8_idct_dc_add_rvv, zve32x
lpad 0
lh a3, (a1) lh a3, (a1)
addi a3, a3, 4 addi a3, a3, 4
srai a3, a3, 3 srai a3, a3, 3
@ -182,6 +185,7 @@ func ff_vp78_idct_dc_add_rvv, zve32x
endfunc endfunc
func ff_vp8_idct_dc_add4y_rvv, zve32x func ff_vp8_idct_dc_add4y_rvv, zve32x
lpad 0
li t0, 32 li t0, 32
vsetivli zero, 4, e16, mf2, ta, ma vsetivli zero, 4, e16, mf2, ta, ma
li t1, 4 - (128 << 3) li t1, 4 - (128 << 3)
@ -217,6 +221,7 @@ func ff_vp78_idct_dc_add4y_rvv, zve32x
endfunc endfunc
func ff_vp8_idct_dc_add4uv_rvv, zve32x func ff_vp8_idct_dc_add4uv_rvv, zve32x
lpad 0
li t0, 32 li t0, 32
vsetivli zero, 4, e16, mf2, ta, ma vsetivli zero, 4, e16, mf2, ta, ma
li t1, 4 - (128 << 3) li t1, 4 - (128 << 3)
@ -265,6 +270,7 @@ endfunc
.macro put_vp8_bilin_h_v type mn .macro put_vp8_bilin_h_v type mn
func ff_put_vp8_bilin4_\type\()_rvv, zve32x func ff_put_vp8_bilin4_\type\()_rvv, zve32x
lpad 0
vsetvlstatic8 4 vsetvlstatic8 4
.Lbilin_\type: .Lbilin_\type:
li t1, 8 li t1, 8
@ -310,6 +316,7 @@ put_vp8_bilin_h_v h a5
put_vp8_bilin_h_v v a6 put_vp8_bilin_h_v v a6
func ff_put_vp8_bilin4_hv_rvv, zve32x func ff_put_vp8_bilin4_hv_rvv, zve32x
lpad 0
vsetvlstatic8 4 vsetvlstatic8 4
.Lbilin_hv: .Lbilin_hv:
li t3, 8 li t3, 8
@ -335,16 +342,19 @@ endfunc
.irp len,16,8 .irp len,16,8
func ff_put_vp8_bilin\len\()_h_rvv, zve32x func ff_put_vp8_bilin\len\()_h_rvv, zve32x
lpad 0
vsetvlstatic8 \len vsetvlstatic8 \len
j .Lbilin_h j .Lbilin_h
endfunc endfunc
func ff_put_vp8_bilin\len\()_v_rvv, zve32x func ff_put_vp8_bilin\len\()_v_rvv, zve32x
lpad 0
vsetvlstatic8 \len vsetvlstatic8 \len
j .Lbilin_v j .Lbilin_v
endfunc endfunc
func ff_put_vp8_bilin\len\()_hv_rvv, zve32x func ff_put_vp8_bilin\len\()_hv_rvv, zve32x
lpad 0
vsetvlstatic8 \len vsetvlstatic8 \len
j .Lbilin_hv j .Lbilin_hv
endfunc endfunc
@ -441,6 +451,7 @@ endconst
.macro epel len size type .macro epel len size type
func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x, zba func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x, zba
lpad 0
epel_filter \size \type t epel_filter \size \type t
vsetvlstatic8 \len vsetvlstatic8 \len
1: 1:
@ -456,6 +467,7 @@ endfunc
.macro epel_hv len hsize vsize .macro epel_hv len hsize vsize
func ff_put_vp8_epel\len\()_h\hsize\()v\vsize\()_rvv, zve32x, zba func ff_put_vp8_epel\len\()_h\hsize\()v\vsize\()_rvv, zve32x, zba
lpad 0
#if __riscv_xlen == 64 #if __riscv_xlen == 64
addi sp, sp, -48 addi sp, sp, -48
.irp n,0,1,2,3,4,5 .irp n,0,1,2,3,4,5

View File

@ -22,6 +22,7 @@
#if __riscv_xlen >= 64 #if __riscv_xlen >= 64
func ff_v_32x32_rvi, zba func ff_v_32x32_rvi, zba
lpad 0
ld t0, (a3) ld t0, (a3)
ld t1, 8(a3) ld t1, 8(a3)
ld t2, 16(a3) ld t2, 16(a3)
@ -43,6 +44,7 @@ func ff_v_32x32_rvi, zba
endfunc endfunc
func ff_v_16x16_rvi, zba func ff_v_16x16_rvi, zba
lpad 0
ld t0, (a3) ld t0, (a3)
ld t1, 8(a3) ld t1, 8(a3)
.rept 8 .rept 8
@ -58,6 +60,7 @@ func ff_v_16x16_rvi, zba
endfunc endfunc
func ff_v_8x8_rvi, zba func ff_v_8x8_rvi, zba
lpad 0
ld t0, (a3) ld t0, (a3)
.rept 4 .rept 4
add a7, a0, a1 add a7, a0, a1

View File

@ -91,6 +91,7 @@
.macro func_dc name size type n restore ext .macro func_dc name size type n restore ext
func ff_\()\name\()_\()\size\()x\size\()_rvv, \ext func ff_\()\name\()_\()\size\()x\size\()_rvv, \ext
lpad 0
.if \size == 8 .if \size == 8
dc_e64 \type \size \n \restore dc_e64 \type \size \n \restore
.else .else
@ -119,6 +120,7 @@ func_dc dc_top 16 top 4 1 zve32x
func_dc dc_top 8 top 3 0 zve64x func_dc dc_top 8 top 3 0 zve64x
func ff_h_32x32_rvv, zve32x func ff_h_32x32_rvv, zve32x
lpad 0
li t0, 32 li t0, 32
addi a2, a2, 31 addi a2, a2, 31
vsetvli zero, t0, e8, m2, ta, ma vsetvli zero, t0, e8, m2, ta, ma
@ -139,6 +141,7 @@ func ff_h_32x32_rvv, zve32x
endfunc endfunc
func ff_h_16x16_rvv, zve32x func ff_h_16x16_rvv, zve32x
lpad 0
addi a2, a2, 15 addi a2, a2, 15
vsetivli zero, 16, e8, m1, ta, ma vsetivli zero, 16, e8, m1, ta, ma
@ -157,6 +160,7 @@ func ff_h_16x16_rvv, zve32x
endfunc endfunc
func ff_h_8x8_rvv, zve32x func ff_h_8x8_rvv, zve32x
lpad 0
addi a2, a2, 7 addi a2, a2, 7
vsetivli zero, 8, e8, mf2, ta, ma vsetivli zero, 8, e8, mf2, ta, ma
@ -190,6 +194,7 @@ endfunc
.endm .endm
func ff_tm_32x32_rvv, zve32x func ff_tm_32x32_rvv, zve32x
lpad 0
lbu a4, -1(a3) lbu a4, -1(a3)
li t5, 32 li t5, 32
@ -244,6 +249,7 @@ func ff_tm_16x16_rvv, zve32x
endfunc endfunc
func ff_tm_8x8_rvv, zve32x func ff_tm_8x8_rvv, zve32x
lpad 0
vsetivli zero, 8, e16, m1, ta, ma vsetivli zero, 8, e16, m1, ta, ma
vle8.v v8, (a3) vle8.v v8, (a3)
vzext.vf2 v28, v8 vzext.vf2 v28, v8
@ -269,6 +275,7 @@ func ff_tm_8x8_rvv, zve32x
endfunc endfunc
func ff_tm_4x4_rvv, zve32x func ff_tm_4x4_rvv, zve32x
lpad 0
vsetivli zero, 4, e16, mf2, ta, ma vsetivli zero, 4, e16, mf2, ta, ma
vle8.v v8, (a3) vle8.v v8, (a3)
vzext.vf2 v28, v8 vzext.vf2 v28, v8

View File

@ -22,6 +22,7 @@
#if __riscv_xlen >= 64 #if __riscv_xlen >= 64
func ff_copy64_rvi func ff_copy64_rvi
lpad 0
1: 1:
addi a4, a4, -1 addi a4, a4, -1
ld t0, (a2) ld t0, (a2)
@ -48,6 +49,7 @@ func ff_copy64_rvi
endfunc endfunc
func ff_copy32_rvi func ff_copy32_rvi
lpad 0
1: 1:
addi a4, a4, -1 addi a4, a4, -1
ld t0, (a2) ld t0, (a2)
@ -66,6 +68,7 @@ func ff_copy32_rvi
endfunc endfunc
func ff_copy16_rvi func ff_copy16_rvi
lpad 0
1: 1:
addi a4, a4, -1 addi a4, a4, -1
ld t0, (a2) ld t0, (a2)
@ -80,6 +83,7 @@ func ff_copy16_rvi
endfunc endfunc
func ff_copy8_rvi func ff_copy8_rvi
lpad 0
1: 1:
addi a4, a4, -1 addi a4, a4, -1
ld t0, (a2) ld t0, (a2)
@ -93,6 +97,7 @@ endfunc
#endif #endif
func ff_copy4_rvi func ff_copy4_rvi
lpad 0
1: 1:
addi a4, a4, -1 addi a4, a4, -1
lw t0, (a2) lw t0, (a2)

View File

@ -38,6 +38,7 @@
.macro copy_avg len .macro copy_avg len
func ff_vp9_avg\len\()_rvv, zve32x func ff_vp9_avg\len\()_rvv, zve32x
lpad 0
csrwi vxrm, 0 csrwi vxrm, 0
vsetvlstatic8 \len, t0, 64 vsetvlstatic8 \len, t0, 64
1: 1: