lavc/riscv: add forward-edge CFI landing pads

This commit is contained in:
Rémi Denis-Courmont 2024-07-22 22:17:40 +03:00
parent a14d21a446
commit f2c30fe15a
49 changed files with 194 additions and 0 deletions

View File

@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S"
func ff_abs_pow34_rvv, zve32f, zba
lpad 0
1:
vsetvli t0, a2, e32, m8, ta, ma
sub a2, a2, t0
@ -39,6 +40,7 @@ func ff_abs_pow34_rvv, zve32f, zba
endfunc
func ff_aac_quant_bands_rvv, zve32f, zba
lpad 0
NOHWF fmv.w.x fa0, a6
NOHWF fmv.w.x fa1, a7
fcvt.s.w ft0, a5

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_ps_add_squares_rvv, zve64f, zba
lpad 0
li t1, 32
1:
vsetvli t0, a2, e32, m4, ta, ma
@ -40,6 +41,7 @@ func ff_ps_add_squares_rvv, zve64f, zba
endfunc
func ff_ps_mul_pair_single_rvv, zve32f, zba
lpad 0
1:
vsetvli t0, a3, e32, m4, ta, ma
vlseg2e32.v v24, (a1)
@ -57,6 +59,7 @@ func ff_ps_mul_pair_single_rvv, zve32f, zba
endfunc
func ff_ps_hybrid_analysis_rvv, zve32f
lpad 0
/* We need 26 FP registers, for 20 scratch ones. Spill fs0-fs5. */
addi sp, sp, -48
.irp n, 0, 1, 2, 3, 4, 5
@ -135,6 +138,7 @@ NOHWD flw fs\n, (4 * \n)(sp)
endfunc
func ff_ps_hybrid_analysis_ileave_rvv, zve32x /* no zve32f here */, zba
lpad 0
slli t0, a2, 5 + 1 + 2 // ctz(32 * 2 * 4)
sh2add a1, a2, a1
add a0, a0, t0
@ -208,6 +212,7 @@ func ff_ps_hybrid_synthesis_deint_rvv, zve64x, zba
endfunc
func ff_ps_stereo_interpolate_rvv, zve32f, b
lpad 0
vsetvli t0, zero, e32, m2, ta, ma
vid.v v24
flw ft0, (a2)

View File

@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S"
func ff_ac3_exponent_min_rvb, zbb
lpad 0
beqz a1, 3f
1:
addi a2, a2, -1
@ -43,6 +44,7 @@ func ff_ac3_exponent_min_rvb, zbb
endfunc
func ff_extract_exponents_rvb, zbb
lpad 0
1:
lw t0, (a1)
addi a0, a0, 1

View File

@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S"
func ff_ac3_exponent_min_rvv, zve32x
lpad 0
beqz a1, 3f
1:
vsetvli t2, a2, e8, m8, ta, ma
@ -44,6 +45,7 @@ func ff_ac3_exponent_min_rvv, zve32x
endfunc
func ff_float_to_fixed24_rvv, zve32f, zba
lpad 0
li t1, 1 << 24
fcvt.s.w f0, t1
1:
@ -62,6 +64,7 @@ endfunc
#if __riscv_xlen >= 64
func ff_sum_square_butterfly_int32_rvv, zve64x, zba
lpad 0
vsetvli t0, zero, e64, m8, ta, ma
vmv.v.x v0, zero
vmv.v.x v8, zero
@ -102,6 +105,7 @@ endfunc
#endif
func ff_sum_square_butterfly_float_rvv, zve32f, zba
lpad 0
vsetvli t0, zero, e32, m8, ta, ma
vmv.v.x v0, zero
vmv.v.x v8, zero

View File

@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S"
func ff_extract_exponents_rvvb, zve32x, zvbb, zba
lpad 0
1:
vsetvli t0, a2, e32, m8, ta, ma
vle32.v v8, (a1)

View File

@ -22,6 +22,7 @@
#if (__riscv_xlen == 64)
func ff_alac_decorrelate_stereo_rvv, zve32x, zba
lpad 0
ld a4, 8(a0)
ld a0, 0(a0)
1:
@ -44,6 +45,7 @@ func ff_alac_decorrelate_stereo_rvv, zve32x, zba
endfunc
func ff_alac_append_extra_bits_mono_rvv, zve32x, zba
lpad 0
ld a0, (a0)
ld a1, (a1)
1:
@ -62,6 +64,7 @@ func ff_alac_append_extra_bits_mono_rvv, zve32x, zba
endfunc
func ff_alac_append_extra_bits_stereo_rvv, zve32x, zba
lpad 0
ld a6, 8(a0)
ld a0, (a0)
ld a7, 8(a1)

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_vector_clipf_rvf, f
lpad 0
NOHWF fmv.w.x fa0, a3
NOHWF fmv.w.x fa1, a4
1:

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_scalarproduct_int16_rvv, zve32x, zba
lpad 0
vsetvli t0, zero, e32, m8, ta, ma
vmv.v.x v8, zero
vmv.s.x v0, zero
@ -56,6 +57,7 @@ func ff_vector_clip_int32_rvv, zve32x, zba
endfunc
func ff_vector_clipf_rvv, zve32f, zba
lpad 0
NOHWF fmv.w.x fa0, a3
NOHWF fmv.w.x fa1, a4
1:

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_clear_block_rvv, zve64x
lpad 0
vsetivli zero, 16, e64, m8, ta, ma
vmv.v.i v0, 0
vse64.v v0, (a0)
@ -29,6 +30,7 @@ func ff_clear_block_rvv, zve64x
endfunc
func ff_clear_blocks_rvv, zve64x
lpad 0
vsetivli zero, 16, e64, m8, ta, ma
vmv.v.i v0, 0
@ -42,6 +44,7 @@ func ff_clear_blocks_rvv, zve64x
endfunc
func ff_fill_block16_rvv, zve32x
lpad 0
vsetivli t0, 16, e8, m1, ta, ma
vmv.v.x v8, a1
1:
@ -54,6 +57,7 @@ func ff_fill_block16_rvv, zve32x
endfunc
func ff_fill_block8_rvv, zve64x
lpad 0
vsetvli t0, zero, e8, m4, ta, ma
vmv.v.x v8, a1
vsetvli t0, a3, e64, m4, ta, ma

View File

@ -24,6 +24,7 @@
#if (__riscv_xlen >= 64)
func ff_bswap32_buf_rvb, zba, zbb
lpad 0
bswap32_rvb a0, a1, a2
endfunc
#endif

View File

@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S"
func ff_bswap16_buf_rvv, zve32x, zba
lpad 0
1:
vsetvli t0, a2, e16, m8, ta, ma
vle16.v v8, (a1)

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_reorder_pixels_rvv, zve32x, zba
lpad 0
srai a2, a2, 1
add t1, a1, a2
1:

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_flac_lpc16_rvv, zve32x, b
lpad 0
vtype_vli t0, a2, t2, e32, ta, ma
vsetvl zero, a2, t0
vle32.v v8, (a1)
@ -46,6 +47,7 @@ endfunc
#if (__riscv_xlen == 64)
func ff_flac_lpc32_rvv, zve64x, zba
lpad 0
addi t2, a2, -16
ble t2, zero, ff_flac_lpc32_rvv_simple
vsetivli zero, 1, e64, m1, ta, ma
@ -77,6 +79,7 @@ func ff_flac_lpc32_rvv, zve64x, zba
endfunc
func ff_flac_lpc32_rvv_simple, zve64x, b
lpad 0
vtype_vli t3, a2, t1, e64, ta, ma
vntypei t2, t3
vsetvl zero, a2, t3 // e64
@ -105,6 +108,7 @@ func ff_flac_lpc32_rvv_simple, zve64x, b
endfunc
func ff_flac_lpc33_rvv, zve64x, b
lpad 0
vtype_vli t0, a3, t1, e64, ta, ma
vsetvl zero, a3, t0
vmv.s.x v0, zero
@ -133,6 +137,7 @@ endfunc
#endif
func ff_flac_wasted32_rvv, zve32x, zba
lpad 0
1:
vsetvli t0, a2, e32, m8, ta, ma
vle32.v v8, (a0)
@ -146,6 +151,7 @@ func ff_flac_wasted32_rvv, zve32x, zba
endfunc
func ff_flac_wasted33_rvv, zve64x, zba
lpad 0
srli t0, a2, 5
li t1, 1
bnez t0, 2f
@ -178,6 +184,7 @@ endfunc
#if (__riscv_xlen == 64)
func ff_flac_decorrelate_indep2_16_rvv, zve32x, zba
lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
@ -201,6 +208,7 @@ func ff_flac_decorrelate_indep2_16_rvv, zve32x, zba
endfunc
func ff_flac_decorrelate_indep4_16_rvv, zve32x, zba
lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld t1, 16(a1)
@ -234,6 +242,7 @@ func ff_flac_decorrelate_indep4_16_rvv, zve32x, zba
endfunc
func ff_flac_decorrelate_indep6_16_rvv, zve32x, zba
lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld t1, 16(a1)
@ -279,6 +288,7 @@ func ff_flac_decorrelate_indep6_16_rvv, zve32x, zba
endfunc
func ff_flac_decorrelate_indep8_16_rvv, zve32x, zba
lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld t1, 16(a1)
@ -333,6 +343,7 @@ func ff_flac_decorrelate_indep8_16_rvv, zve32x, zba
endfunc
func ff_flac_decorrelate_ls_16_rvv, zve32x, zba
lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
@ -357,6 +368,7 @@ func ff_flac_decorrelate_ls_16_rvv, zve32x, zba
endfunc
func ff_flac_decorrelate_rs_16_rvv, zve32x, zba
lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
@ -381,6 +393,7 @@ func ff_flac_decorrelate_rs_16_rvv, zve32x, zba
endfunc
func ff_flac_decorrelate_ms_16_rvv, zve32x, zba
lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
@ -407,6 +420,7 @@ func ff_flac_decorrelate_ms_16_rvv, zve32x, zba
endfunc
func ff_flac_decorrelate_indep2_32_rvv, zve32x, zba
lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
@ -427,6 +441,7 @@ func ff_flac_decorrelate_indep2_32_rvv, zve32x, zba
endfunc
func ff_flac_decorrelate_indep4_32_rvv, zve32x, zba
lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld t1, 16(a1)
@ -456,6 +471,7 @@ func ff_flac_decorrelate_indep4_32_rvv, zve32x, zba
endfunc
func ff_flac_decorrelate_indep6_32_rvv, zve32x, zba
lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld t1, 16(a1)
@ -494,6 +510,7 @@ func ff_flac_decorrelate_indep6_32_rvv, zve32x, zba
endfunc
func ff_flac_decorrelate_indep8_32_rvv, zve32x, zba
lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld t1, 16(a1)
@ -539,6 +556,7 @@ func ff_flac_decorrelate_indep8_32_rvv, zve32x, zba
endfunc
func ff_flac_decorrelate_ls_32_rvv, zve32x, zba
lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
@ -560,6 +578,7 @@ func ff_flac_decorrelate_ls_32_rvv, zve32x, zba
endfunc
func ff_flac_decorrelate_rs_32_rvv, zve32x, zba
lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
@ -581,6 +600,7 @@ func ff_flac_decorrelate_rs_32_rvv, zve32x, zba
endfunc
func ff_flac_decorrelate_ms_32_rvv, zve32x, zba
lpad 0
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_int32_to_float_fmul_scalar_rvv, zve32f, zba
lpad 0
NOHWF fmv.w.x fa0, a2
NOHWF mv a2, a3
1:
@ -38,6 +39,7 @@ NOHWF mv a2, a3
endfunc
func ff_int32_to_float_fmul_array8_rvv, zve32f, zba
lpad 0
srai a4, a4, 3
1: vsetvli t0, a4, e32, m1, ta, ma

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_g722_apply_qmf_rvv, zve32x
lpad 0
lla t0, qmf_coeffs
vsetivli zero, 12, e16, m2, ta, ma
vlseg2e16.v v28, (a0)

View File

@ -23,6 +23,7 @@
.option push
.option norelax
func ff_h263_h_loop_filter_rvv, zve32x
lpad 0
addi a0, a0, -2
vsetivli zero, 8, e8, mf2, ta, ma
vlsseg4e8.v v8, (a0), a1
@ -83,6 +84,7 @@ endfunc
.option pop
func ff_h263_v_loop_filter_rvv, zve32x
lpad 0
sub a4, a0, a1
vsetivli zero, 8, e8, mf2, ta, ma
vle8.v v10, (a0)

View File

@ -325,6 +325,7 @@
ret
.endm
.variant_cc h264_put_chroma_mc_rvv
func h264_put_chroma_mc_rvv, zve32x, zba
11:
li a7, 3
@ -334,6 +335,7 @@ func h264_put_chroma_mc_rvv, zve32x, zba
do_chroma_mc put 0
endfunc
.variant_cc h264_avg_chroma_mc_rvv
func h264_avg_chroma_mc_rvv, zve32x, zba
21:
li a7, 3
@ -344,31 +346,37 @@ func h264_avg_chroma_mc_rvv, zve32x, zba
endfunc
func h264_put_chroma_mc8_rvv, zve32x
lpad 0
li t6, 8
j 11b
endfunc
func h264_put_chroma_mc4_rvv, zve32x
lpad 0
li t6, 4
j 11b
endfunc
func h264_put_chroma_mc2_rvv, zve32x
lpad 0
li t6, 2
j 11b
endfunc
func h264_avg_chroma_mc8_rvv, zve32x
lpad 0
li t6, 8
j 21b
endfunc
func h264_avg_chroma_mc4_rvv, zve32x
lpad 0
li t6, 4
j 21b
endfunc
func h264_avg_chroma_mc2_rvv, zve32x
lpad 0
li t6, 2
j 21b
endfunc

View File

@ -37,6 +37,7 @@
.endm
func ff_h264_add_pixels4_8_rvv, zve32x
lpad 0
vsetivli zero, 4, e8, mf4, ta, ma
vlse32.v v8, (a0), a2
vsetivli zero, 4 * 4, e8, m1, ta, ma
@ -54,6 +55,7 @@ func ff_h264_add_pixels4_8_rvv, zve32x
endfunc
func ff_h264_add_pixels4_16_rvv, zve64x
lpad 0
vsetivli zero, 4, e16, mf2, ta, ma
vlse64.v v8, (a0), a2
vsetivli zero, 4 * 4, e16, m2, ta, ma
@ -71,6 +73,7 @@ func ff_h264_add_pixels4_16_rvv, zve64x
endfunc
func ff_h264_add_pixels8_8_rvv, zve64x
lpad 0
li t0, 8 * 8
vsetivli zero, 8, e8, mf2, ta, ma
vlse64.v v8, (a0), a2
@ -89,6 +92,7 @@ func ff_h264_add_pixels8_8_rvv, zve64x
endfunc
func ff_h264_add_pixels8_16_rvv, zve32x
lpad 0
li t0, 8
vsetivli zero, 8, e16, m1, ta, ma
1:

View File

@ -164,6 +164,7 @@ endfunc
.irp w, 16, 8, 4, 2
func ff_h264_weight_pixels\w\()_8_rvv, zve32x
lpad 0
li a6, \w
.if \w == 16
j ff_h264_weight_pixels_simple_8_rvv
@ -173,6 +174,7 @@ func ff_h264_weight_pixels\w\()_8_rvv, zve32x
endfunc
func ff_h264_biweight_pixels\w\()_8_rvv, zve32x
lpad 0
li t6, \w
.if \w == 16
j ff_h264_biweight_pixels_simple_8_rvv
@ -272,6 +274,7 @@ func ff_h264_loop_filter_luma_8_rvv, zve32x
endfunc
func ff_h264_v_loop_filter_luma_8_rvv, zve32x
lpad 0
vsetivli zero, 4, e32, m1, ta, ma
vle8.v v4, (a4)
li t0, 0x01010101
@ -299,6 +302,7 @@ func ff_h264_v_loop_filter_luma_8_rvv, zve32x
endfunc
func ff_h264_h_loop_filter_luma_8_rvv, zve32x
lpad 0
vsetivli zero, 4, e32, m1, ta, ma
vle8.v v4, (a4)
li t0, 0x01010101
@ -313,6 +317,7 @@ func ff_h264_h_loop_filter_luma_8_rvv, zve32x
endfunc
func ff_h264_h_loop_filter_luma_mbaff_8_rvv, zve32x
lpad 0
vsetivli zero, 4, e16, mf2, ta, ma
vle8.v v4, (a4)
li t0, 0x0101

View File

@ -55,6 +55,7 @@ func ff_h264_idct4_rvv, zve32x
endfunc
func ff_h264_idct_add_8_rvv, zve32x
lpad 0
csrwi vxrm, 0
.Lidct_add4_8_rvv:
vsetivli zero, 4, e16, mf2, ta, ma
@ -213,6 +214,7 @@ func ff_h264_idct8_rvv, zve32x
endfunc
func ff_h264_idct8_add_8_rvv, zve32x
lpad 0
csrwi vxrm, 0
.Lidct8_add_8_rvv:
vsetivli zero, 8, e16, m1, ta, ma
@ -405,11 +407,13 @@ endfunc
.irp depth, 9, 10, 12, 14
func ff_h264_idct_add_\depth\()_rvv, zve32x
lpad 0
li a5, (1 << \depth) - 1
j ff_h264_idct_add_16_rvv
endfunc
func ff_h264_idct8_add_\depth\()_rvv, zve32x
lpad 0
li a5, (1 << \depth) - 1
j ff_h264_idct8_add_16_rvv
endfunc
@ -417,6 +421,7 @@ endfunc
.macro idct_dc_add8 width
func ff_h264_idct\width\()_dc_add_8_rvv, zve64x
lpad 0
.if \width == 8
vsetivli zero, \width, e8, mf2, ta, ma
.else
@ -517,11 +522,13 @@ idct_dc_add 8
.irp depth,9,10,12,14
func ff_h264_idct4_dc_add_\depth\()_rvv, zve64x
lpad 0
li a5, (1 << \depth) - 1
j ff_h264_idct4_dc_add_16_rvv
endfunc
func ff_h264_idct8_dc_add_\depth\()_rvv, zve64x
lpad 0
li a5, (1 << \depth) - 1
j ff_h264_idct8_dc_add_16_rvv
endfunc
@ -534,6 +541,9 @@ endconst
.macro idct4_adds type, depth
func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x, b
.if \depth == 8
lpad 0
.endif
csrwi vxrm, 0
lla t0, ff_h264_scan8
li t1, 32 * (\depth / 8)
@ -609,6 +619,9 @@ idct4_adds 16intra, \depth
#if (__riscv_xlen == 64)
func ff_h264_idct8_add4_\depth\()_rvv, zve32x, b
.if \depth == 8
lpad 0
.endif
csrwi vxrm, 0
addi sp, sp, -48
lla t0, ff_h264_scan8
@ -686,17 +699,20 @@ endfunc
.irp depth, 9, 10, 12, 14
func ff_h264_idct_add16_\depth\()_rvv, zve32x
lpad 0
li a5, (1 << \depth) - 1
j ff_h264_idct_add16_16_rvv
endfunc
func ff_h264_idct_add16intra_\depth\()_rvv, zve32x
lpad 0
li a5, (1 << \depth) - 1
j ff_h264_idct_add16intra_16_rvv
endfunc
#if (__riscv_xlen == 64)
func ff_h264_idct8_add4_\depth\()_rvv, zve32x
lpad 0
li a5, (1 << \depth) - 1
j ff_h264_idct8_add4_16_rvv
endfunc

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_add_int16_rvv, zve32x, zba
lpad 0
1:
vsetvli t0, a3, e16, m8, ta, ma
vle16.v v16, (a0)
@ -37,6 +38,7 @@ func ff_add_int16_rvv, zve32x, zba
endfunc
func ff_add_hfyu_left_pred_bgr32_rvv, zve32x, b
lpad 0
vtype_ivli t1, 4, e8, ta, ma
li t0, 4
vsetvl zero, t0, t1

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_put_pixels_clamped_rvv, zve64x
lpad 0
li t0, 8 * 8
vsetvli zero, t0, e16, m8, ta, ma
vle16.v v24, (a0)
@ -35,6 +36,7 @@ func ff_put_pixels_clamped_rvv, zve64x
endfunc
func ff_put_signed_pixels_clamped_rvv, zve64x
lpad 0
li t0, 8 * 8
vsetvli zero, t0, e8, m4, ta, ma
vle16.v v24, (a0)
@ -47,6 +49,7 @@ func ff_put_signed_pixels_clamped_rvv, zve64x
endfunc
func ff_add_pixels_clamped_rvv, zve64x
lpad 0
vsetivli zero, 8, e8, mf2, ta, ma
li t0, 8 * 8
vlse64.v v16, (a1), a2

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_ict_float_rvv, zve32f, zba
lpad 0
lla t0, ff_jpeg2000_f_ict_params
flw ft0, 0(t0)
flw ft1, 4(t0)
@ -49,6 +50,7 @@ func ff_ict_float_rvv, zve32f, zba
endfunc
func ff_rct_int_rvv, zve32x, zba
lpad 0
1:
vsetvli t0, a3, e32, m8, ta, ma
vle32.v v16, (a1)

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_scalarproduct_and_madd_int16_rvv, zve32x, zba
lpad 0
vsetvli t0, zero, e32, m8, ta, ma
vmv.v.x v0, zero
1:
@ -45,6 +46,7 @@ func ff_scalarproduct_and_madd_int16_rvv, zve32x, zba
endfunc
func ff_scalarproduct_and_madd_int32_rvv, zve32x, zba
lpad 0
vsetvli t0, zero, e32, m8, ta, ma
vmv.v.x v0, zero
1:

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_llvid_add_bytes_rvv, zve32x
lpad 0
1:
vsetvli t0, a2, e8, m8, ta, ma
vle8.v v0, (a1)

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_llvidenc_diff_bytes_rvv, zve32x
lpad 0
1:
vsetvli t0, a3, e8, m8, ta, ma
vle8.v v0, (a1)

View File

@ -22,6 +22,7 @@
#if __riscv_xlen >= 64
func ff_lpc_apply_welch_window_rvv, zve64d, zba
lpad 0
vsetvli t0, zero, e64, m8, ta, ma
vid.v v0
addi t2, a1, -1
@ -87,6 +88,7 @@ func ff_lpc_apply_welch_window_rvv, zve64d, zba
endfunc
func ff_lpc_compute_autocorr_rvv, zve64d, b
lpad 0
vtype_vli t1, a2, t2, e64, ta, ma, 1
addi a2, a2, 1
li t0, 1

View File

@ -27,6 +27,7 @@
.endm
func ff_pix_abs16_rvv, zve32x
lpad 0
vsetivli zero, 1, e32, m1, ta, ma
vmv.s.x v0, zero
1:
@ -47,6 +48,7 @@ func ff_pix_abs16_rvv, zve32x
endfunc
func ff_pix_abs8_rvv, zve32x
lpad 0
vsetivli zero, 1, e32, m1, ta, ma
vmv.s.x v0, zero
1:
@ -67,6 +69,7 @@ func ff_pix_abs8_rvv, zve32x
endfunc
func ff_pix_abs16_x2_rvv, zve32x
lpad 0
csrwi vxrm, 0
vsetivli zero, 1, e32, m1, ta, ma
li t5, 1
@ -92,6 +95,7 @@ func ff_pix_abs16_x2_rvv, zve32x
endfunc
func ff_pix_abs8_x2_rvv, zve32x
lpad 0
csrwi vxrm, 0
vsetivli zero, 1, e32, m1, ta, ma
li t5, 1
@ -117,6 +121,7 @@ func ff_pix_abs8_x2_rvv, zve32x
endfunc
func ff_pix_abs16_y2_rvv, zve32x
lpad 0
csrwi vxrm, 0
vsetivli zero, 1, e32, m1, ta, ma
add t1, a2, a3
@ -142,6 +147,7 @@ func ff_pix_abs16_y2_rvv, zve32x
endfunc
func ff_pix_abs8_y2_rvv, zve32x
lpad 0
csrwi vxrm, 0
vsetivli zero, 1, e32, m1, ta, ma
add t1, a2, a3
@ -167,6 +173,7 @@ func ff_pix_abs8_y2_rvv, zve32x
endfunc
func ff_sse16_rvv, zve32x
lpad 0
vsetivli t0, 16, e32, m4, ta, ma
vmv.v.x v24, zero
vmv.s.x v0, zero
@ -189,6 +196,7 @@ func ff_sse16_rvv, zve32x
endfunc
func ff_sse8_rvv, zve32x
lpad 0
vsetivli t0, 8, e32, m2, ta, ma
vmv.v.x v24, zero
vmv.s.x v0, zero
@ -211,6 +219,7 @@ func ff_sse8_rvv, zve32x
endfunc
func ff_sse4_rvv, zve32x
lpad 0
vsetivli t0, 4, e32, m1, ta, ma
vmv.v.x v24, zero
vmv.s.x v0, zero
@ -239,6 +248,7 @@ endfunc
.endm
.macro vsad_vsse16 type
lpad 0
vsetivli t0, 16, e32, m4, ta, ma
addi a4, a4, -1
add t1, a1, a3
@ -277,6 +287,7 @@ endfunc
.endm
.macro vsad_vsse8 type
lpad 0
vsetivli t0, 8, e32, m2, ta, ma
addi a4, a4, -1
add t1, a1, a3
@ -315,6 +326,7 @@ endfunc
.endm
.macro vsad_vsse_intra16 type
lpad 0
vsetivli t0, 16, e32, m4, ta, ma
addi a4, a4, -1
add t1, a1, a3
@ -346,6 +358,7 @@ endfunc
.endm
.macro vsad_vsse_intra8 type
lpad 0
vsetivli t0, 8, e32, m2, ta, ma
addi a4, a4, -1
add t1, a1, a3
@ -409,6 +422,8 @@ func ff_vsad_intra8_rvv, zve32x
endfunc
func ff_nsse16_rvv, zve32x
lpad 0
.macro squarediff16
vsetivli zero, 16, e8, m1, tu, ma
vle8.v v4, (a1)
@ -468,6 +483,8 @@ func ff_nsse16_rvv, zve32x
endfunc
func ff_nsse8_rvv, zve32x
lpad 0
.macro squarediff8
vsetivli zero, 8, e8, mf2, tu, ma
vle8.v v4, (a1)

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_opus_postfilter_rvv, zve32f, b
lpad 0
flw fa0, 0(a2) // g0
slli t1, a1, 2
flw fa1, 4(a2) // g1

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_get_pixels_8_rvi
lpad 0
.irp row, 0, 1, 2, 3, 4, 5, 6, 7
ld t0, (a1)
add a1, a1, a2
@ -47,6 +48,7 @@ func ff_get_pixels_8_rvi
endfunc
func ff_get_pixels_16_rvi
lpad 0
.irp row, 0, 1, 2, 3, 4, 5, 6, 7
ld t0, 0(a1)
ld t1, 8(a1)

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_get_pixels_8_rvv, zve64x
lpad 0
vsetivli zero, 8, e8, mf2, ta, ma
li t0, 8 * 8
1:
@ -32,6 +33,7 @@ func ff_get_pixels_8_rvv, zve64x
endfunc
func ff_get_pixels_unaligned_8_rvv, zve64x
lpad 0
andi t1, a1, 7
vsetivli zero, 8, e64, m4, ta, ma
li t0, 8 * 8
@ -52,6 +54,7 @@ func ff_get_pixels_unaligned_8_rvv, zve64x
endfunc
func ff_diff_pixels_rvv, zve64x
lpad 0
vsetivli zero, 8, e8, mf2, ta, ma
li t0, 8 * 8
vlse64.v v16, (a1), a3
@ -63,6 +66,7 @@ func ff_diff_pixels_rvv, zve64x
endfunc
func ff_diff_pixels_unaligned_rvv, zve32x
lpad 0
vsetivli zero, 8, e8, mf2, ta, ma
vlsseg8e8.v v16, (a1), a3
vlsseg8e8.v v24, (a2), a3

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_rv34_inv_transform_dc_rvv, zve32x
lpad 0
lh t1, 0(a0)
li t0, 13 * 13 * 3
mul t2, t0, t1
@ -33,6 +34,7 @@ func ff_rv34_inv_transform_dc_rvv, zve32x
endfunc
func ff_rv34_idct_dc_add_rvv, zve32x
lpad 0
vsetivli zero, 4, e8, mf4, ta, ma
vlse32.v v0, (a0), a1
li t1, 169

View File

@ -351,21 +351,25 @@ func ff_avg_rv40_chroma_mc_rvv, zve32x, zba
endfunc
func ff_put_rv40_chroma_mc8_rvv, zve32x
lpad 0
li t6, 8
j 11b
endfunc
func ff_put_rv40_chroma_mc4_rvv, zve32x
lpad 0
li t6, 4
j 11b
endfunc
func ff_avg_rv40_chroma_mc8_rvv, zve32x
lpad 0
li t6, 8
j 21b
endfunc
func ff_avg_rv40_chroma_mc4_rvv, zve32x
lpad 0
li t6, 4
j 21b
endfunc

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_sbr_sum64x5_rvv, zve32f, zba
lpad 0
li a5, 64
addi a1, a0, 64 * 4
addi a2, a0, 128 * 4
@ -50,6 +51,7 @@ func ff_sbr_sum64x5_rvv, zve32f, zba
endfunc
func ff_sbr_sum_square_rvv, zve32f, zba
lpad 0
vsetvli t0, zero, e32, m8, ta, ma
slli a1, a1, 1
vmv.v.x v8, zero
@ -69,6 +71,7 @@ NOHWF fmv.x.w a0, fa0
endfunc
func ff_sbr_autocorrelate_rvv, zve32f
lpad 0
vsetvli t0, zero, e32, m4, ta, ma
vmv.v.x v0, zero
flw fa0, (a0)
@ -158,6 +161,7 @@ func ff_sbr_autocorrelate_rvv, zve32f
endfunc
func ff_sbr_hf_gen_rvv, zve32f, zba
lpad 0
NOHWF fmv.w.x fa0, a4
NOHWF mv a4, a5
NOHWF mv a5, a6
@ -208,6 +212,7 @@ NOHWF mv a5, a6
endfunc
func ff_sbr_hf_g_filt_rvv, zve32f, zba
lpad 0
li t1, 40 * 2 * 4
sh3add a1, a4, a1
1:
@ -273,15 +278,18 @@ endfunc
.endm
func ff_sbr_hf_apply_noise_0_rvv, zve32f, b
lpad 0
hf_apply_noise 0
endfunc
func ff_sbr_hf_apply_noise_3_rvv, zve32f, b
lpad 0
not a4, a4 // invert parity of kx
// fall through
endfunc
func ff_sbr_hf_apply_noise_1_rvv, zve32f, b
lpad 0
vsetvli t0, zero, e32, m4, ta, ma
vid.v v4
vxor.vx v4, v4, a4
@ -290,5 +298,6 @@ func ff_sbr_hf_apply_noise_1_rvv, zve32f, b
endfunc
func ff_sbr_hf_apply_noise_2_rvv, zve32f, b
lpad 0
hf_apply_noise 2
endfunc

View File

@ -37,6 +37,7 @@
.endm
func ff_startcode_find_candidate_rvb, zbb
lpad 0
add a1, a0, a1
// Potentially unaligned head

View File

@ -27,6 +27,7 @@
#include "libavutil/riscv/asm.S"
func ff_startcode_find_candidate_rvv, zve32x
lpad 0
mv t0, a0
1:
vsetvli t1, a1, e8, m8, ta, ma

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_ssd_int8_vs_int16_rvv, zve32x, zba
lpad 0
vsetvli t0, zero, e32, m8, ta, ma
vmv.v.x v24, zero
1:

View File

@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S"
func ff_decorrelate_ls_rvv, zve32x, zba
lpad 0
1:
vsetvli t0, a2, e32, m8, ta, ma
sub a2, a2, t0
@ -36,6 +37,7 @@ func ff_decorrelate_ls_rvv, zve32x, zba
endfunc
func ff_decorrelate_sr_rvv, zve32x, zba
lpad 0
1:
vsetvli t0, a2, e32, m8, ta, ma
vle32.v v0, (a0)
@ -50,6 +52,7 @@ func ff_decorrelate_sr_rvv, zve32x, zba
endfunc
func ff_decorrelate_sm_rvv, zve32x, zba
lpad 0
1:
vsetvli t0, a2, e32, m8, ta, ma
vle32.v v8, (a1)
@ -68,6 +71,7 @@ func ff_decorrelate_sm_rvv, zve32x, zba
endfunc
func ff_decorrelate_sf_rvv, zve32x, zba
lpad 0
csrwi vxrm, 0
1:
vsetvli t0, a2, e32, m8, ta, ma

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_restore_rgb_planes_rvv, zve32x, zba
lpad 0
li t1, -0x80
sub a3, a3, a6
sub a4, a4, a6
@ -53,6 +54,7 @@ func ff_restore_rgb_planes_rvv, zve32x, zba
endfunc
func ff_restore_rgb_planes10_rvv, zve32x, zba
lpad 0
li t1, -0x200
li t2, 0x3FF
sub a3, a3, a6

View File

@ -22,6 +22,7 @@
#if __riscv_xlen >= 64
func ff_put_pixels8x8_rvi
lpad 0
.rept 8
ld t0, (a1)
sd t0, (a0)
@ -33,6 +34,7 @@ func ff_put_pixels8x8_rvi
endfunc
func ff_put_pixels16x16_rvi
lpad 0
.rept 16
ld t0, (a1)
ld t1, 8(a1)

View File

@ -22,6 +22,7 @@
#include "libavutil/riscv/asm.S"
func ff_vc1_inv_trans_8x8_dc_rvv, zve64x, zba
lpad 0
lh t2, (a2)
vsetivli zero, 8, e8, mf2, ta, ma
vlse64.v v0, (a0), a1
@ -44,6 +45,7 @@ func ff_vc1_inv_trans_8x8_dc_rvv, zve64x, zba
endfunc
func ff_vc1_inv_trans_4x8_dc_rvv, zve32x, zba
lpad 0
lh t2, (a2)
vsetivli zero, 8, e8, mf2, ta, ma
vlse32.v v0, (a0), a1
@ -68,6 +70,7 @@ func ff_vc1_inv_trans_4x8_dc_rvv, zve32x, zba
endfunc
func ff_vc1_inv_trans_8x4_dc_rvv, zve64x, zba
lpad 0
lh t2, (a2)
vsetivli zero, 4, e8, mf4, ta, ma
vlse64.v v0, (a0), a1
@ -91,6 +94,7 @@ func ff_vc1_inv_trans_8x4_dc_rvv, zve64x, zba
endfunc
func ff_vc1_inv_trans_4x4_dc_rvv, zve32x
lpad 0
lh t2, (a2)
vsetivli zero, 4, e8, mf4, ta, ma
vlse32.v v0, (a0), a1
@ -203,6 +207,7 @@ func ff_vc1_inv_trans_4_rvv, zve32x
endfunc
func ff_vc1_inv_trans_8x8_rvv, zve32x
lpad 0
csrwi vxrm, 0
vsetivli zero, 8, e16, m1, ta, ma
addi a1, a0, 1 * 8 * 2
@ -240,6 +245,7 @@ func ff_vc1_inv_trans_8x8_rvv, zve32x
endfunc
func ff_vc1_inv_trans_8x4_rvv, zve32x
lpad 0
csrwi vxrm, 0
vsetivli zero, 4, e16, mf2, ta, ma
vlseg8e16.v v0, (a2)
@ -285,6 +291,7 @@ func ff_vc1_inv_trans_8x4_rvv, zve32x
endfunc
func ff_vc1_inv_trans_4x8_rvv, zve32x
lpad 0
li a3, 8 * 2
csrwi vxrm, 0
vsetivli zero, 8, e16, m1, ta, ma
@ -359,6 +366,7 @@ func ff_vc1_inv_trans_4x8_rvv, zve32x
endfunc
func ff_vc1_inv_trans_4x4_rvv, zve32x
lpad 0
li a3, 8 * 2
csrwi vxrm, 0
vsetivli zero, 4, e16, mf2, ta, ma
@ -422,12 +430,14 @@ endfunc
.endm
func ff_avg_pixels16x16_rvv, zve32x
lpad 0
li t0, 16
vsetivli zero, 16, e8, m1, ta, ma
j 1f
endfunc
func ff_avg_pixels8x8_rvv, zve32x
lpad 0
li t0, 8
vsetivli zero, 8, e8, mf2, ta, ma
1:
@ -446,6 +456,7 @@ func ff_avg_pixels8x8_rvv, zve32x
endfunc
func ff_vc1_unescape_buffer_rvv, zve32x
lpad 0
vsetivli zero, 2, e8, m1, ta, ma
vmv.v.i v8, -1
li t4, 1

View File

@ -21,6 +21,7 @@
#include "libavutil/riscv/asm.S"
func ff_vorbis_inverse_coupling_rvv, zve32f, zba
lpad 0
fmv.w.x ft0, zero
1:
vsetvli t0, a2, e32, m4, ta, ma

View File

@ -22,6 +22,7 @@
#if __riscv_xlen >= 64
func ff_vp7_luma_dc_wht_rvv, zve32x, zba
lpad 0
li a2, 4 * 16 * 2
li a7, 16 * 2
jal t0, 1f
@ -99,6 +100,7 @@ func ff_vp7_luma_dc_wht_rvv, zve32x, zba
endfunc
func ff_vp7_idct_add_rvv, zve32x
lpad 0
jal t0, 1b
csrwi vxrm, 2
vsetvli zero, zero, e8, mf4, ta, ma
@ -130,6 +132,7 @@ endfunc
.irp type, y, uv
func ff_vp7_idct_dc_add4\type\()_rvv, zve32x
lpad 0
li t0, 32
vsetivli zero, 4, e16, mf2, ta, ma
li t1, 23170

View File

@ -22,6 +22,7 @@
#if __riscv_xlen >= 64
func ff_put_vp8_pixels16_rvi
lpad 0
1:
addi a4, a4, -1
ld t0, (a2)
@ -36,6 +37,7 @@ func ff_put_vp8_pixels16_rvi
endfunc
func ff_put_vp8_pixels8_rvi
lpad 0
1:
addi a4, a4, -1
ld t0, (a2)
@ -49,6 +51,7 @@ endfunc
#endif
func ff_put_vp8_pixels4_rvi
lpad 0
1:
addi a4, a4, -1
lw t0, (a2)

View File

@ -45,6 +45,7 @@
#if __riscv_xlen >= 64
func ff_vp8_luma_dc_wht_rvv, zve64x
lpad 0
vsetivli zero, 1, e64, m1, ta, ma
vlseg4e64.v v4, (a1)
vsetivli zero, 4, e16, mf2, ta, ma
@ -99,6 +100,7 @@ endfunc
#endif
func ff_vp8_idct_add_rvv, zve32x
lpad 0
csrwi vxrm, 0
vsetivli zero, 4, e16, mf2, ta, ma
addi a3, a1, 1 * 4 * 2
@ -158,6 +160,7 @@ func ff_vp8_idct_add_rvv, zve32x
endfunc
func ff_vp8_idct_dc_add_rvv, zve32x
lpad 0
lh a3, (a1)
addi a3, a3, 4
srai a3, a3, 3
@ -182,6 +185,7 @@ func ff_vp78_idct_dc_add_rvv, zve32x
endfunc
func ff_vp8_idct_dc_add4y_rvv, zve32x
lpad 0
li t0, 32
vsetivli zero, 4, e16, mf2, ta, ma
li t1, 4 - (128 << 3)
@ -217,6 +221,7 @@ func ff_vp78_idct_dc_add4y_rvv, zve32x
endfunc
func ff_vp8_idct_dc_add4uv_rvv, zve32x
lpad 0
li t0, 32
vsetivli zero, 4, e16, mf2, ta, ma
li t1, 4 - (128 << 3)
@ -265,6 +270,7 @@ endfunc
.macro put_vp8_bilin_h_v type mn
func ff_put_vp8_bilin4_\type\()_rvv, zve32x
lpad 0
vsetvlstatic8 4
.Lbilin_\type:
li t1, 8
@ -310,6 +316,7 @@ put_vp8_bilin_h_v h a5
put_vp8_bilin_h_v v a6
func ff_put_vp8_bilin4_hv_rvv, zve32x
lpad 0
vsetvlstatic8 4
.Lbilin_hv:
li t3, 8
@ -335,16 +342,19 @@ endfunc
.irp len,16,8
func ff_put_vp8_bilin\len\()_h_rvv, zve32x
lpad 0
vsetvlstatic8 \len
j .Lbilin_h
endfunc
func ff_put_vp8_bilin\len\()_v_rvv, zve32x
lpad 0
vsetvlstatic8 \len
j .Lbilin_v
endfunc
func ff_put_vp8_bilin\len\()_hv_rvv, zve32x
lpad 0
vsetvlstatic8 \len
j .Lbilin_hv
endfunc
@ -441,6 +451,7 @@ endconst
.macro epel len size type
func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x, zba
lpad 0
epel_filter \size \type t
vsetvlstatic8 \len
1:
@ -456,6 +467,7 @@ endfunc
.macro epel_hv len hsize vsize
func ff_put_vp8_epel\len\()_h\hsize\()v\vsize\()_rvv, zve32x, zba
lpad 0
#if __riscv_xlen == 64
addi sp, sp, -48
.irp n,0,1,2,3,4,5

View File

@ -22,6 +22,7 @@
#if __riscv_xlen >= 64
func ff_v_32x32_rvi, zba
lpad 0
ld t0, (a3)
ld t1, 8(a3)
ld t2, 16(a3)
@ -43,6 +44,7 @@ func ff_v_32x32_rvi, zba
endfunc
func ff_v_16x16_rvi, zba
lpad 0
ld t0, (a3)
ld t1, 8(a3)
.rept 8
@ -58,6 +60,7 @@ func ff_v_16x16_rvi, zba
endfunc
func ff_v_8x8_rvi, zba
lpad 0
ld t0, (a3)
.rept 4
add a7, a0, a1

View File

@ -91,6 +91,7 @@
.macro func_dc name size type n restore ext
func ff_\()\name\()_\()\size\()x\size\()_rvv, \ext
lpad 0
.if \size == 8
dc_e64 \type \size \n \restore
.else
@ -119,6 +120,7 @@ func_dc dc_top 16 top 4 1 zve32x
func_dc dc_top 8 top 3 0 zve64x
func ff_h_32x32_rvv, zve32x
lpad 0
li t0, 32
addi a2, a2, 31
vsetvli zero, t0, e8, m2, ta, ma
@ -139,6 +141,7 @@ func ff_h_32x32_rvv, zve32x
endfunc
func ff_h_16x16_rvv, zve32x
lpad 0
addi a2, a2, 15
vsetivli zero, 16, e8, m1, ta, ma
@ -157,6 +160,7 @@ func ff_h_16x16_rvv, zve32x
endfunc
func ff_h_8x8_rvv, zve32x
lpad 0
addi a2, a2, 7
vsetivli zero, 8, e8, mf2, ta, ma
@ -190,6 +194,7 @@ endfunc
.endm
func ff_tm_32x32_rvv, zve32x
lpad 0
lbu a4, -1(a3)
li t5, 32
@ -244,6 +249,7 @@ func ff_tm_16x16_rvv, zve32x
endfunc
func ff_tm_8x8_rvv, zve32x
lpad 0
vsetivli zero, 8, e16, m1, ta, ma
vle8.v v8, (a3)
vzext.vf2 v28, v8
@ -269,6 +275,7 @@ func ff_tm_8x8_rvv, zve32x
endfunc
func ff_tm_4x4_rvv, zve32x
lpad 0
vsetivli zero, 4, e16, mf2, ta, ma
vle8.v v8, (a3)
vzext.vf2 v28, v8

View File

@ -22,6 +22,7 @@
#if __riscv_xlen >= 64
func ff_copy64_rvi
lpad 0
1:
addi a4, a4, -1
ld t0, (a2)
@ -48,6 +49,7 @@ func ff_copy64_rvi
endfunc
func ff_copy32_rvi
lpad 0
1:
addi a4, a4, -1
ld t0, (a2)
@ -66,6 +68,7 @@ func ff_copy32_rvi
endfunc
func ff_copy16_rvi
lpad 0
1:
addi a4, a4, -1
ld t0, (a2)
@ -80,6 +83,7 @@ func ff_copy16_rvi
endfunc
func ff_copy8_rvi
lpad 0
1:
addi a4, a4, -1
ld t0, (a2)
@ -93,6 +97,7 @@ endfunc
#endif
func ff_copy4_rvi
lpad 0
1:
addi a4, a4, -1
lw t0, (a2)

View File

@ -38,6 +38,7 @@
.macro copy_avg len
func ff_vp9_avg\len\()_rvv, zve32x
lpad 0
csrwi vxrm, 0
vsetvlstatic8 \len, t0, 64
1: