From f2c30fe15aef2ed009941959333773f40a2b273e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Mon, 22 Jul 2024 22:17:40 +0300 Subject: [PATCH] lavc/riscv: add forward-edge CFI landing pads --- libavcodec/riscv/aacencdsp_rvv.S | 2 ++ libavcodec/riscv/aacpsdsp_rvv.S | 5 +++++ libavcodec/riscv/ac3dsp_rvb.S | 2 ++ libavcodec/riscv/ac3dsp_rvv.S | 4 ++++ libavcodec/riscv/ac3dsp_rvvb.S | 1 + libavcodec/riscv/alacdsp_rvv.S | 3 +++ libavcodec/riscv/audiodsp_rvf.S | 1 + libavcodec/riscv/audiodsp_rvv.S | 2 ++ libavcodec/riscv/blockdsp_rvv.S | 4 ++++ libavcodec/riscv/bswapdsp_rvb.S | 1 + libavcodec/riscv/bswapdsp_rvv.S | 1 + libavcodec/riscv/exrdsp_rvv.S | 1 + libavcodec/riscv/flacdsp_rvv.S | 20 ++++++++++++++++++++ libavcodec/riscv/fmtconvert_rvv.S | 2 ++ libavcodec/riscv/g722dsp_rvv.S | 1 + libavcodec/riscv/h263dsp_rvv.S | 2 ++ libavcodec/riscv/h264_mc_chroma.S | 8 ++++++++ libavcodec/riscv/h264addpx_rvv.S | 4 ++++ libavcodec/riscv/h264dsp_rvv.S | 5 +++++ libavcodec/riscv/h264idct_rvv.S | 16 ++++++++++++++++ libavcodec/riscv/huffyuvdsp_rvv.S | 2 ++ libavcodec/riscv/idctdsp_rvv.S | 3 +++ libavcodec/riscv/jpeg2000dsp_rvv.S | 2 ++ libavcodec/riscv/llauddsp_rvv.S | 2 ++ libavcodec/riscv/llviddsp_rvv.S | 1 + libavcodec/riscv/llvidencdsp_rvv.S | 1 + libavcodec/riscv/lpc_rvv.S | 2 ++ libavcodec/riscv/me_cmp_rvv.S | 17 +++++++++++++++++ libavcodec/riscv/opusdsp_rvv.S | 1 + libavcodec/riscv/pixblockdsp_rvi.S | 2 ++ libavcodec/riscv/pixblockdsp_rvv.S | 4 ++++ libavcodec/riscv/rv34dsp_rvv.S | 2 ++ libavcodec/riscv/rv40dsp_rvv.S | 4 ++++ libavcodec/riscv/sbrdsp_rvv.S | 9 +++++++++ libavcodec/riscv/startcode_rvb.S | 1 + libavcodec/riscv/startcode_rvv.S | 1 + libavcodec/riscv/svqenc_rvv.S | 1 + libavcodec/riscv/takdsp_rvv.S | 4 ++++ libavcodec/riscv/utvideodsp_rvv.S | 2 ++ libavcodec/riscv/vc1dsp_rvi.S | 2 ++ libavcodec/riscv/vc1dsp_rvv.S | 11 +++++++++++ libavcodec/riscv/vorbisdsp_rvv.S | 1 + libavcodec/riscv/vp7dsp_rvv.S | 3 +++ libavcodec/riscv/vp8dsp_rvi.S | 3 +++ libavcodec/riscv/vp8dsp_rvv.S | 12 ++++++++++++ libavcodec/riscv/vp9_intra_rvi.S | 3 +++ libavcodec/riscv/vp9_intra_rvv.S | 7 +++++++ libavcodec/riscv/vp9_mc_rvi.S | 5 +++++ libavcodec/riscv/vp9_mc_rvv.S | 1 + 49 files changed, 194 insertions(+) diff --git a/libavcodec/riscv/aacencdsp_rvv.S b/libavcodec/riscv/aacencdsp_rvv.S index 05a603b6f6..83d1868079 100644 --- a/libavcodec/riscv/aacencdsp_rvv.S +++ b/libavcodec/riscv/aacencdsp_rvv.S @@ -22,6 +22,7 @@ #include "libavutil/riscv/asm.S" func ff_abs_pow34_rvv, zve32f, zba + lpad 0 1: vsetvli t0, a2, e32, m8, ta, ma sub a2, a2, t0 @@ -39,6 +40,7 @@ func ff_abs_pow34_rvv, zve32f, zba endfunc func ff_aac_quant_bands_rvv, zve32f, zba + lpad 0 NOHWF fmv.w.x fa0, a6 NOHWF fmv.w.x fa1, a7 fcvt.s.w ft0, a5 diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S index 72e2103c22..5f169dd6d4 100644 --- a/libavcodec/riscv/aacpsdsp_rvv.S +++ b/libavcodec/riscv/aacpsdsp_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_ps_add_squares_rvv, zve64f, zba + lpad 0 li t1, 32 1: vsetvli t0, a2, e32, m4, ta, ma @@ -40,6 +41,7 @@ func ff_ps_add_squares_rvv, zve64f, zba endfunc func ff_ps_mul_pair_single_rvv, zve32f, zba + lpad 0 1: vsetvli t0, a3, e32, m4, ta, ma vlseg2e32.v v24, (a1) @@ -57,6 +59,7 @@ func ff_ps_mul_pair_single_rvv, zve32f, zba endfunc func ff_ps_hybrid_analysis_rvv, zve32f + lpad 0 /* We need 26 FP registers, for 20 scratch ones. Spill fs0-fs5. */ addi sp, sp, -48 .irp n, 0, 1, 2, 3, 4, 5 @@ -135,6 +138,7 @@ NOHWD flw fs\n, (4 * \n)(sp) endfunc func ff_ps_hybrid_analysis_ileave_rvv, zve32x /* no zve32f here */, zba + lpad 0 slli t0, a2, 5 + 1 + 2 // ctz(32 * 2 * 4) sh2add a1, a2, a1 add a0, a0, t0 @@ -208,6 +212,7 @@ func ff_ps_hybrid_synthesis_deint_rvv, zve64x, zba endfunc func ff_ps_stereo_interpolate_rvv, zve32f, b + lpad 0 vsetvli t0, zero, e32, m2, ta, ma vid.v v24 flw ft0, (a2) diff --git a/libavcodec/riscv/ac3dsp_rvb.S b/libavcodec/riscv/ac3dsp_rvb.S index 0ca56466e1..a3c5187cfe 100644 --- a/libavcodec/riscv/ac3dsp_rvb.S +++ b/libavcodec/riscv/ac3dsp_rvb.S @@ -22,6 +22,7 @@ #include "libavutil/riscv/asm.S" func ff_ac3_exponent_min_rvb, zbb + lpad 0 beqz a1, 3f 1: addi a2, a2, -1 @@ -43,6 +44,7 @@ func ff_ac3_exponent_min_rvb, zbb endfunc func ff_extract_exponents_rvb, zbb + lpad 0 1: lw t0, (a1) addi a0, a0, 1 diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S index c733733286..261cb9628b 100644 --- a/libavcodec/riscv/ac3dsp_rvv.S +++ b/libavcodec/riscv/ac3dsp_rvv.S @@ -22,6 +22,7 @@ #include "libavutil/riscv/asm.S" func ff_ac3_exponent_min_rvv, zve32x + lpad 0 beqz a1, 3f 1: vsetvli t2, a2, e8, m8, ta, ma @@ -44,6 +45,7 @@ func ff_ac3_exponent_min_rvv, zve32x endfunc func ff_float_to_fixed24_rvv, zve32f, zba + lpad 0 li t1, 1 << 24 fcvt.s.w f0, t1 1: @@ -62,6 +64,7 @@ endfunc #if __riscv_xlen >= 64 func ff_sum_square_butterfly_int32_rvv, zve64x, zba + lpad 0 vsetvli t0, zero, e64, m8, ta, ma vmv.v.x v0, zero vmv.v.x v8, zero @@ -102,6 +105,7 @@ endfunc #endif func ff_sum_square_butterfly_float_rvv, zve32f, zba + lpad 0 vsetvli t0, zero, e32, m8, ta, ma vmv.v.x v0, zero vmv.v.x v8, zero diff --git a/libavcodec/riscv/ac3dsp_rvvb.S b/libavcodec/riscv/ac3dsp_rvvb.S index 5bffb40bba..2f4e644553 100644 --- a/libavcodec/riscv/ac3dsp_rvvb.S +++ b/libavcodec/riscv/ac3dsp_rvvb.S @@ -22,6 +22,7 @@ #include "libavutil/riscv/asm.S" func ff_extract_exponents_rvvb, zve32x, zvbb, zba + lpad 0 1: vsetvli t0, a2, e32, m8, ta, ma vle32.v v8, (a1) diff --git a/libavcodec/riscv/alacdsp_rvv.S b/libavcodec/riscv/alacdsp_rvv.S index 19714bd6e3..118a1f2918 100644 --- a/libavcodec/riscv/alacdsp_rvv.S +++ b/libavcodec/riscv/alacdsp_rvv.S @@ -22,6 +22,7 @@ #if (__riscv_xlen == 64) func ff_alac_decorrelate_stereo_rvv, zve32x, zba + lpad 0 ld a4, 8(a0) ld a0, 0(a0) 1: @@ -44,6 +45,7 @@ func ff_alac_decorrelate_stereo_rvv, zve32x, zba endfunc func ff_alac_append_extra_bits_mono_rvv, zve32x, zba + lpad 0 ld a0, (a0) ld a1, (a1) 1: @@ -62,6 +64,7 @@ func ff_alac_append_extra_bits_mono_rvv, zve32x, zba endfunc func ff_alac_append_extra_bits_stereo_rvv, zve32x, zba + lpad 0 ld a6, 8(a0) ld a0, (a0) ld a7, 8(a1) diff --git a/libavcodec/riscv/audiodsp_rvf.S b/libavcodec/riscv/audiodsp_rvf.S index 2ec8a11691..97aa930ab5 100644 --- a/libavcodec/riscv/audiodsp_rvf.S +++ b/libavcodec/riscv/audiodsp_rvf.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_vector_clipf_rvf, f + lpad 0 NOHWF fmv.w.x fa0, a3 NOHWF fmv.w.x fa1, a4 1: diff --git a/libavcodec/riscv/audiodsp_rvv.S b/libavcodec/riscv/audiodsp_rvv.S index b7134de523..7ba64bae79 100644 --- a/libavcodec/riscv/audiodsp_rvv.S +++ b/libavcodec/riscv/audiodsp_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_scalarproduct_int16_rvv, zve32x, zba + lpad 0 vsetvli t0, zero, e32, m8, ta, ma vmv.v.x v8, zero vmv.s.x v0, zero @@ -56,6 +57,7 @@ func ff_vector_clip_int32_rvv, zve32x, zba endfunc func ff_vector_clipf_rvv, zve32f, zba + lpad 0 NOHWF fmv.w.x fa0, a3 NOHWF fmv.w.x fa1, a4 1: diff --git a/libavcodec/riscv/blockdsp_rvv.S b/libavcodec/riscv/blockdsp_rvv.S index 18ab17da00..04da265417 100644 --- a/libavcodec/riscv/blockdsp_rvv.S +++ b/libavcodec/riscv/blockdsp_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_clear_block_rvv, zve64x + lpad 0 vsetivli zero, 16, e64, m8, ta, ma vmv.v.i v0, 0 vse64.v v0, (a0) @@ -29,6 +30,7 @@ func ff_clear_block_rvv, zve64x endfunc func ff_clear_blocks_rvv, zve64x + lpad 0 vsetivli zero, 16, e64, m8, ta, ma vmv.v.i v0, 0 @@ -42,6 +44,7 @@ func ff_clear_blocks_rvv, zve64x endfunc func ff_fill_block16_rvv, zve32x + lpad 0 vsetivli t0, 16, e8, m1, ta, ma vmv.v.x v8, a1 1: @@ -54,6 +57,7 @@ func ff_fill_block16_rvv, zve32x endfunc func ff_fill_block8_rvv, zve64x + lpad 0 vsetvli t0, zero, e8, m4, ta, ma vmv.v.x v8, a1 vsetvli t0, a3, e64, m4, ta, ma diff --git a/libavcodec/riscv/bswapdsp_rvb.S b/libavcodec/riscv/bswapdsp_rvb.S index 17cfd5d7ef..52b6cd0d7b 100644 --- a/libavcodec/riscv/bswapdsp_rvb.S +++ b/libavcodec/riscv/bswapdsp_rvb.S @@ -24,6 +24,7 @@ #if (__riscv_xlen >= 64) func ff_bswap32_buf_rvb, zba, zbb + lpad 0 bswap32_rvb a0, a1, a2 endfunc #endif diff --git a/libavcodec/riscv/bswapdsp_rvv.S b/libavcodec/riscv/bswapdsp_rvv.S index 14484a772d..b4911bf0ef 100644 --- a/libavcodec/riscv/bswapdsp_rvv.S +++ b/libavcodec/riscv/bswapdsp_rvv.S @@ -22,6 +22,7 @@ #include "libavutil/riscv/asm.S" func ff_bswap16_buf_rvv, zve32x, zba + lpad 0 1: vsetvli t0, a2, e16, m8, ta, ma vle16.v v8, (a1) diff --git a/libavcodec/riscv/exrdsp_rvv.S b/libavcodec/riscv/exrdsp_rvv.S index c1d7dfcb86..e58ed70b9e 100644 --- a/libavcodec/riscv/exrdsp_rvv.S +++ b/libavcodec/riscv/exrdsp_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_reorder_pixels_rvv, zve32x, zba + lpad 0 srai a2, a2, 1 add t1, a1, a2 1: diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S index 69505c694a..a927f188d3 100644 --- a/libavcodec/riscv/flacdsp_rvv.S +++ b/libavcodec/riscv/flacdsp_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_flac_lpc16_rvv, zve32x, b + lpad 0 vtype_vli t0, a2, t2, e32, ta, ma vsetvl zero, a2, t0 vle32.v v8, (a1) @@ -46,6 +47,7 @@ endfunc #if (__riscv_xlen == 64) func ff_flac_lpc32_rvv, zve64x, zba + lpad 0 addi t2, a2, -16 ble t2, zero, ff_flac_lpc32_rvv_simple vsetivli zero, 1, e64, m1, ta, ma @@ -77,6 +79,7 @@ func ff_flac_lpc32_rvv, zve64x, zba endfunc func ff_flac_lpc32_rvv_simple, zve64x, b + lpad 0 vtype_vli t3, a2, t1, e64, ta, ma vntypei t2, t3 vsetvl zero, a2, t3 // e64 @@ -105,6 +108,7 @@ func ff_flac_lpc32_rvv_simple, zve64x, b endfunc func ff_flac_lpc33_rvv, zve64x, b + lpad 0 vtype_vli t0, a3, t1, e64, ta, ma vsetvl zero, a3, t0 vmv.s.x v0, zero @@ -133,6 +137,7 @@ endfunc #endif func ff_flac_wasted32_rvv, zve32x, zba + lpad 0 1: vsetvli t0, a2, e32, m8, ta, ma vle32.v v8, (a0) @@ -146,6 +151,7 @@ func ff_flac_wasted32_rvv, zve32x, zba endfunc func ff_flac_wasted33_rvv, zve64x, zba + lpad 0 srli t0, a2, 5 li t1, 1 bnez t0, 2f @@ -178,6 +184,7 @@ endfunc #if (__riscv_xlen == 64) func ff_flac_decorrelate_indep2_16_rvv, zve32x, zba + lpad 0 ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -201,6 +208,7 @@ func ff_flac_decorrelate_indep2_16_rvv, zve32x, zba endfunc func ff_flac_decorrelate_indep4_16_rvv, zve32x, zba + lpad 0 ld a0, (a0) ld a2, 8(a1) ld t1, 16(a1) @@ -234,6 +242,7 @@ func ff_flac_decorrelate_indep4_16_rvv, zve32x, zba endfunc func ff_flac_decorrelate_indep6_16_rvv, zve32x, zba + lpad 0 ld a0, (a0) ld a2, 8(a1) ld t1, 16(a1) @@ -279,6 +288,7 @@ func ff_flac_decorrelate_indep6_16_rvv, zve32x, zba endfunc func ff_flac_decorrelate_indep8_16_rvv, zve32x, zba + lpad 0 ld a0, (a0) ld a2, 8(a1) ld t1, 16(a1) @@ -333,6 +343,7 @@ func ff_flac_decorrelate_indep8_16_rvv, zve32x, zba endfunc func ff_flac_decorrelate_ls_16_rvv, zve32x, zba + lpad 0 ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -357,6 +368,7 @@ func ff_flac_decorrelate_ls_16_rvv, zve32x, zba endfunc func ff_flac_decorrelate_rs_16_rvv, zve32x, zba + lpad 0 ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -381,6 +393,7 @@ func ff_flac_decorrelate_rs_16_rvv, zve32x, zba endfunc func ff_flac_decorrelate_ms_16_rvv, zve32x, zba + lpad 0 ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -407,6 +420,7 @@ func ff_flac_decorrelate_ms_16_rvv, zve32x, zba endfunc func ff_flac_decorrelate_indep2_32_rvv, zve32x, zba + lpad 0 ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -427,6 +441,7 @@ func ff_flac_decorrelate_indep2_32_rvv, zve32x, zba endfunc func ff_flac_decorrelate_indep4_32_rvv, zve32x, zba + lpad 0 ld a0, (a0) ld a2, 8(a1) ld t1, 16(a1) @@ -456,6 +471,7 @@ func ff_flac_decorrelate_indep4_32_rvv, zve32x, zba endfunc func ff_flac_decorrelate_indep6_32_rvv, zve32x, zba + lpad 0 ld a0, (a0) ld a2, 8(a1) ld t1, 16(a1) @@ -494,6 +510,7 @@ func ff_flac_decorrelate_indep6_32_rvv, zve32x, zba endfunc func ff_flac_decorrelate_indep8_32_rvv, zve32x, zba + lpad 0 ld a0, (a0) ld a2, 8(a1) ld t1, 16(a1) @@ -539,6 +556,7 @@ func ff_flac_decorrelate_indep8_32_rvv, zve32x, zba endfunc func ff_flac_decorrelate_ls_32_rvv, zve32x, zba + lpad 0 ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -560,6 +578,7 @@ func ff_flac_decorrelate_ls_32_rvv, zve32x, zba endfunc func ff_flac_decorrelate_rs_32_rvv, zve32x, zba + lpad 0 ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -581,6 +600,7 @@ func ff_flac_decorrelate_rs_32_rvv, zve32x, zba endfunc func ff_flac_decorrelate_ms_32_rvv, zve32x, zba + lpad 0 ld a0, (a0) ld a2, 8(a1) ld a1, (a1) diff --git a/libavcodec/riscv/fmtconvert_rvv.S b/libavcodec/riscv/fmtconvert_rvv.S index 05cd3b38a5..cedeab4e5b 100644 --- a/libavcodec/riscv/fmtconvert_rvv.S +++ b/libavcodec/riscv/fmtconvert_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_int32_to_float_fmul_scalar_rvv, zve32f, zba + lpad 0 NOHWF fmv.w.x fa0, a2 NOHWF mv a2, a3 1: @@ -38,6 +39,7 @@ NOHWF mv a2, a3 endfunc func ff_int32_to_float_fmul_array8_rvv, zve32f, zba + lpad 0 srai a4, a4, 3 1: vsetvli t0, a4, e32, m1, ta, ma diff --git a/libavcodec/riscv/g722dsp_rvv.S b/libavcodec/riscv/g722dsp_rvv.S index 981d5cecd8..6ceb70fde1 100644 --- a/libavcodec/riscv/g722dsp_rvv.S +++ b/libavcodec/riscv/g722dsp_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_g722_apply_qmf_rvv, zve32x + lpad 0 lla t0, qmf_coeffs vsetivli zero, 12, e16, m2, ta, ma vlseg2e16.v v28, (a0) diff --git a/libavcodec/riscv/h263dsp_rvv.S b/libavcodec/riscv/h263dsp_rvv.S index 97503d527c..c78483926a 100644 --- a/libavcodec/riscv/h263dsp_rvv.S +++ b/libavcodec/riscv/h263dsp_rvv.S @@ -23,6 +23,7 @@ .option push .option norelax func ff_h263_h_loop_filter_rvv, zve32x + lpad 0 addi a0, a0, -2 vsetivli zero, 8, e8, mf2, ta, ma vlsseg4e8.v v8, (a0), a1 @@ -83,6 +84,7 @@ endfunc .option pop func ff_h263_v_loop_filter_rvv, zve32x + lpad 0 sub a4, a0, a1 vsetivli zero, 8, e8, mf2, ta, ma vle8.v v10, (a0) diff --git a/libavcodec/riscv/h264_mc_chroma.S b/libavcodec/riscv/h264_mc_chroma.S index b6c0e1c635..79394b987d 100644 --- a/libavcodec/riscv/h264_mc_chroma.S +++ b/libavcodec/riscv/h264_mc_chroma.S @@ -325,6 +325,7 @@ ret .endm + .variant_cc h264_put_chroma_mc_rvv func h264_put_chroma_mc_rvv, zve32x, zba 11: li a7, 3 @@ -334,6 +335,7 @@ func h264_put_chroma_mc_rvv, zve32x, zba do_chroma_mc put 0 endfunc + .variant_cc h264_avg_chroma_mc_rvv func h264_avg_chroma_mc_rvv, zve32x, zba 21: li a7, 3 @@ -344,31 +346,37 @@ func h264_avg_chroma_mc_rvv, zve32x, zba endfunc func h264_put_chroma_mc8_rvv, zve32x + lpad 0 li t6, 8 j 11b endfunc func h264_put_chroma_mc4_rvv, zve32x + lpad 0 li t6, 4 j 11b endfunc func h264_put_chroma_mc2_rvv, zve32x + lpad 0 li t6, 2 j 11b endfunc func h264_avg_chroma_mc8_rvv, zve32x + lpad 0 li t6, 8 j 21b endfunc func h264_avg_chroma_mc4_rvv, zve32x + lpad 0 li t6, 4 j 21b endfunc func h264_avg_chroma_mc2_rvv, zve32x + lpad 0 li t6, 2 j 21b endfunc diff --git a/libavcodec/riscv/h264addpx_rvv.S b/libavcodec/riscv/h264addpx_rvv.S index 3c0700d1d9..82739881d9 100644 --- a/libavcodec/riscv/h264addpx_rvv.S +++ b/libavcodec/riscv/h264addpx_rvv.S @@ -37,6 +37,7 @@ .endm func ff_h264_add_pixels4_8_rvv, zve32x + lpad 0 vsetivli zero, 4, e8, mf4, ta, ma vlse32.v v8, (a0), a2 vsetivli zero, 4 * 4, e8, m1, ta, ma @@ -54,6 +55,7 @@ func ff_h264_add_pixels4_8_rvv, zve32x endfunc func ff_h264_add_pixels4_16_rvv, zve64x + lpad 0 vsetivli zero, 4, e16, mf2, ta, ma vlse64.v v8, (a0), a2 vsetivli zero, 4 * 4, e16, m2, ta, ma @@ -71,6 +73,7 @@ func ff_h264_add_pixels4_16_rvv, zve64x endfunc func ff_h264_add_pixels8_8_rvv, zve64x + lpad 0 li t0, 8 * 8 vsetivli zero, 8, e8, mf2, ta, ma vlse64.v v8, (a0), a2 @@ -89,6 +92,7 @@ func ff_h264_add_pixels8_8_rvv, zve64x endfunc func ff_h264_add_pixels8_16_rvv, zve32x + lpad 0 li t0, 8 vsetivli zero, 8, e16, m1, ta, ma 1: diff --git a/libavcodec/riscv/h264dsp_rvv.S b/libavcodec/riscv/h264dsp_rvv.S index ed6a16a9c4..a38bf7ef1d 100644 --- a/libavcodec/riscv/h264dsp_rvv.S +++ b/libavcodec/riscv/h264dsp_rvv.S @@ -164,6 +164,7 @@ endfunc .irp w, 16, 8, 4, 2 func ff_h264_weight_pixels\w\()_8_rvv, zve32x + lpad 0 li a6, \w .if \w == 16 j ff_h264_weight_pixels_simple_8_rvv @@ -173,6 +174,7 @@ func ff_h264_weight_pixels\w\()_8_rvv, zve32x endfunc func ff_h264_biweight_pixels\w\()_8_rvv, zve32x + lpad 0 li t6, \w .if \w == 16 j ff_h264_biweight_pixels_simple_8_rvv @@ -272,6 +274,7 @@ func ff_h264_loop_filter_luma_8_rvv, zve32x endfunc func ff_h264_v_loop_filter_luma_8_rvv, zve32x + lpad 0 vsetivli zero, 4, e32, m1, ta, ma vle8.v v4, (a4) li t0, 0x01010101 @@ -299,6 +302,7 @@ func ff_h264_v_loop_filter_luma_8_rvv, zve32x endfunc func ff_h264_h_loop_filter_luma_8_rvv, zve32x + lpad 0 vsetivli zero, 4, e32, m1, ta, ma vle8.v v4, (a4) li t0, 0x01010101 @@ -313,6 +317,7 @@ func ff_h264_h_loop_filter_luma_8_rvv, zve32x endfunc func ff_h264_h_loop_filter_luma_mbaff_8_rvv, zve32x + lpad 0 vsetivli zero, 4, e16, mf2, ta, ma vle8.v v4, (a4) li t0, 0x0101 diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S index a49a32c47e..5dd92de77d 100644 --- a/libavcodec/riscv/h264idct_rvv.S +++ b/libavcodec/riscv/h264idct_rvv.S @@ -55,6 +55,7 @@ func ff_h264_idct4_rvv, zve32x endfunc func ff_h264_idct_add_8_rvv, zve32x + lpad 0 csrwi vxrm, 0 .Lidct_add4_8_rvv: vsetivli zero, 4, e16, mf2, ta, ma @@ -213,6 +214,7 @@ func ff_h264_idct8_rvv, zve32x endfunc func ff_h264_idct8_add_8_rvv, zve32x + lpad 0 csrwi vxrm, 0 .Lidct8_add_8_rvv: vsetivli zero, 8, e16, m1, ta, ma @@ -405,11 +407,13 @@ endfunc .irp depth, 9, 10, 12, 14 func ff_h264_idct_add_\depth\()_rvv, zve32x + lpad 0 li a5, (1 << \depth) - 1 j ff_h264_idct_add_16_rvv endfunc func ff_h264_idct8_add_\depth\()_rvv, zve32x + lpad 0 li a5, (1 << \depth) - 1 j ff_h264_idct8_add_16_rvv endfunc @@ -417,6 +421,7 @@ endfunc .macro idct_dc_add8 width func ff_h264_idct\width\()_dc_add_8_rvv, zve64x + lpad 0 .if \width == 8 vsetivli zero, \width, e8, mf2, ta, ma .else @@ -517,11 +522,13 @@ idct_dc_add 8 .irp depth,9,10,12,14 func ff_h264_idct4_dc_add_\depth\()_rvv, zve64x + lpad 0 li a5, (1 << \depth) - 1 j ff_h264_idct4_dc_add_16_rvv endfunc func ff_h264_idct8_dc_add_\depth\()_rvv, zve64x + lpad 0 li a5, (1 << \depth) - 1 j ff_h264_idct8_dc_add_16_rvv endfunc @@ -534,6 +541,9 @@ endconst .macro idct4_adds type, depth func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x, b +.if \depth == 8 + lpad 0 +.endif csrwi vxrm, 0 lla t0, ff_h264_scan8 li t1, 32 * (\depth / 8) @@ -609,6 +619,9 @@ idct4_adds 16intra, \depth #if (__riscv_xlen == 64) func ff_h264_idct8_add4_\depth\()_rvv, zve32x, b +.if \depth == 8 + lpad 0 +.endif csrwi vxrm, 0 addi sp, sp, -48 lla t0, ff_h264_scan8 @@ -686,17 +699,20 @@ endfunc .irp depth, 9, 10, 12, 14 func ff_h264_idct_add16_\depth\()_rvv, zve32x + lpad 0 li a5, (1 << \depth) - 1 j ff_h264_idct_add16_16_rvv endfunc func ff_h264_idct_add16intra_\depth\()_rvv, zve32x + lpad 0 li a5, (1 << \depth) - 1 j ff_h264_idct_add16intra_16_rvv endfunc #if (__riscv_xlen == 64) func ff_h264_idct8_add4_\depth\()_rvv, zve32x + lpad 0 li a5, (1 << \depth) - 1 j ff_h264_idct8_add4_16_rvv endfunc diff --git a/libavcodec/riscv/huffyuvdsp_rvv.S b/libavcodec/riscv/huffyuvdsp_rvv.S index 54d1d94059..02b0224705 100644 --- a/libavcodec/riscv/huffyuvdsp_rvv.S +++ b/libavcodec/riscv/huffyuvdsp_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_add_int16_rvv, zve32x, zba + lpad 0 1: vsetvli t0, a3, e16, m8, ta, ma vle16.v v16, (a0) @@ -37,6 +38,7 @@ func ff_add_int16_rvv, zve32x, zba endfunc func ff_add_hfyu_left_pred_bgr32_rvv, zve32x, b + lpad 0 vtype_ivli t1, 4, e8, ta, ma li t0, 4 vsetvl zero, t0, t1 diff --git a/libavcodec/riscv/idctdsp_rvv.S b/libavcodec/riscv/idctdsp_rvv.S index e93e6b5e7a..de229a9ae7 100644 --- a/libavcodec/riscv/idctdsp_rvv.S +++ b/libavcodec/riscv/idctdsp_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_put_pixels_clamped_rvv, zve64x + lpad 0 li t0, 8 * 8 vsetvli zero, t0, e16, m8, ta, ma vle16.v v24, (a0) @@ -35,6 +36,7 @@ func ff_put_pixels_clamped_rvv, zve64x endfunc func ff_put_signed_pixels_clamped_rvv, zve64x + lpad 0 li t0, 8 * 8 vsetvli zero, t0, e8, m4, ta, ma vle16.v v24, (a0) @@ -47,6 +49,7 @@ func ff_put_signed_pixels_clamped_rvv, zve64x endfunc func ff_add_pixels_clamped_rvv, zve64x + lpad 0 vsetivli zero, 8, e8, mf2, ta, ma li t0, 8 * 8 vlse64.v v16, (a1), a2 diff --git a/libavcodec/riscv/jpeg2000dsp_rvv.S b/libavcodec/riscv/jpeg2000dsp_rvv.S index 77c6fd2d32..5079df69cc 100644 --- a/libavcodec/riscv/jpeg2000dsp_rvv.S +++ b/libavcodec/riscv/jpeg2000dsp_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_ict_float_rvv, zve32f, zba + lpad 0 lla t0, ff_jpeg2000_f_ict_params flw ft0, 0(t0) flw ft1, 4(t0) @@ -49,6 +50,7 @@ func ff_ict_float_rvv, zve32f, zba endfunc func ff_rct_int_rvv, zve32x, zba + lpad 0 1: vsetvli t0, a3, e32, m8, ta, ma vle32.v v16, (a1) diff --git a/libavcodec/riscv/llauddsp_rvv.S b/libavcodec/riscv/llauddsp_rvv.S index 6af2e6a882..594e553b5d 100644 --- a/libavcodec/riscv/llauddsp_rvv.S +++ b/libavcodec/riscv/llauddsp_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_scalarproduct_and_madd_int16_rvv, zve32x, zba + lpad 0 vsetvli t0, zero, e32, m8, ta, ma vmv.v.x v0, zero 1: @@ -45,6 +46,7 @@ func ff_scalarproduct_and_madd_int16_rvv, zve32x, zba endfunc func ff_scalarproduct_and_madd_int32_rvv, zve32x, zba + lpad 0 vsetvli t0, zero, e32, m8, ta, ma vmv.v.x v0, zero 1: diff --git a/libavcodec/riscv/llviddsp_rvv.S b/libavcodec/riscv/llviddsp_rvv.S index a4814837b9..9572e92dce 100644 --- a/libavcodec/riscv/llviddsp_rvv.S +++ b/libavcodec/riscv/llviddsp_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_llvid_add_bytes_rvv, zve32x + lpad 0 1: vsetvli t0, a2, e8, m8, ta, ma vle8.v v0, (a1) diff --git a/libavcodec/riscv/llvidencdsp_rvv.S b/libavcodec/riscv/llvidencdsp_rvv.S index 0342165127..44bf3ac7e5 100644 --- a/libavcodec/riscv/llvidencdsp_rvv.S +++ b/libavcodec/riscv/llvidencdsp_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_llvidenc_diff_bytes_rvv, zve32x + lpad 0 1: vsetvli t0, a3, e8, m8, ta, ma vle8.v v0, (a1) diff --git a/libavcodec/riscv/lpc_rvv.S b/libavcodec/riscv/lpc_rvv.S index 8fd1ddbbf0..cc6d6ddd29 100644 --- a/libavcodec/riscv/lpc_rvv.S +++ b/libavcodec/riscv/lpc_rvv.S @@ -22,6 +22,7 @@ #if __riscv_xlen >= 64 func ff_lpc_apply_welch_window_rvv, zve64d, zba + lpad 0 vsetvli t0, zero, e64, m8, ta, ma vid.v v0 addi t2, a1, -1 @@ -87,6 +88,7 @@ func ff_lpc_apply_welch_window_rvv, zve64d, zba endfunc func ff_lpc_compute_autocorr_rvv, zve64d, b + lpad 0 vtype_vli t1, a2, t2, e64, ta, ma, 1 addi a2, a2, 1 li t0, 1 diff --git a/libavcodec/riscv/me_cmp_rvv.S b/libavcodec/riscv/me_cmp_rvv.S index c9ae5bb6fc..8989c91dde 100644 --- a/libavcodec/riscv/me_cmp_rvv.S +++ b/libavcodec/riscv/me_cmp_rvv.S @@ -27,6 +27,7 @@ .endm func ff_pix_abs16_rvv, zve32x + lpad 0 vsetivli zero, 1, e32, m1, ta, ma vmv.s.x v0, zero 1: @@ -47,6 +48,7 @@ func ff_pix_abs16_rvv, zve32x endfunc func ff_pix_abs8_rvv, zve32x + lpad 0 vsetivli zero, 1, e32, m1, ta, ma vmv.s.x v0, zero 1: @@ -67,6 +69,7 @@ func ff_pix_abs8_rvv, zve32x endfunc func ff_pix_abs16_x2_rvv, zve32x + lpad 0 csrwi vxrm, 0 vsetivli zero, 1, e32, m1, ta, ma li t5, 1 @@ -92,6 +95,7 @@ func ff_pix_abs16_x2_rvv, zve32x endfunc func ff_pix_abs8_x2_rvv, zve32x + lpad 0 csrwi vxrm, 0 vsetivli zero, 1, e32, m1, ta, ma li t5, 1 @@ -117,6 +121,7 @@ func ff_pix_abs8_x2_rvv, zve32x endfunc func ff_pix_abs16_y2_rvv, zve32x + lpad 0 csrwi vxrm, 0 vsetivli zero, 1, e32, m1, ta, ma add t1, a2, a3 @@ -142,6 +147,7 @@ func ff_pix_abs16_y2_rvv, zve32x endfunc func ff_pix_abs8_y2_rvv, zve32x + lpad 0 csrwi vxrm, 0 vsetivli zero, 1, e32, m1, ta, ma add t1, a2, a3 @@ -167,6 +173,7 @@ func ff_pix_abs8_y2_rvv, zve32x endfunc func ff_sse16_rvv, zve32x + lpad 0 vsetivli t0, 16, e32, m4, ta, ma vmv.v.x v24, zero vmv.s.x v0, zero @@ -189,6 +196,7 @@ func ff_sse16_rvv, zve32x endfunc func ff_sse8_rvv, zve32x + lpad 0 vsetivli t0, 8, e32, m2, ta, ma vmv.v.x v24, zero vmv.s.x v0, zero @@ -211,6 +219,7 @@ func ff_sse8_rvv, zve32x endfunc func ff_sse4_rvv, zve32x + lpad 0 vsetivli t0, 4, e32, m1, ta, ma vmv.v.x v24, zero vmv.s.x v0, zero @@ -239,6 +248,7 @@ endfunc .endm .macro vsad_vsse16 type + lpad 0 vsetivli t0, 16, e32, m4, ta, ma addi a4, a4, -1 add t1, a1, a3 @@ -277,6 +287,7 @@ endfunc .endm .macro vsad_vsse8 type + lpad 0 vsetivli t0, 8, e32, m2, ta, ma addi a4, a4, -1 add t1, a1, a3 @@ -315,6 +326,7 @@ endfunc .endm .macro vsad_vsse_intra16 type + lpad 0 vsetivli t0, 16, e32, m4, ta, ma addi a4, a4, -1 add t1, a1, a3 @@ -346,6 +358,7 @@ endfunc .endm .macro vsad_vsse_intra8 type + lpad 0 vsetivli t0, 8, e32, m2, ta, ma addi a4, a4, -1 add t1, a1, a3 @@ -409,6 +422,8 @@ func ff_vsad_intra8_rvv, zve32x endfunc func ff_nsse16_rvv, zve32x + lpad 0 + .macro squarediff16 vsetivli zero, 16, e8, m1, tu, ma vle8.v v4, (a1) @@ -468,6 +483,8 @@ func ff_nsse16_rvv, zve32x endfunc func ff_nsse8_rvv, zve32x + lpad 0 + .macro squarediff8 vsetivli zero, 8, e8, mf2, tu, ma vle8.v v4, (a1) diff --git a/libavcodec/riscv/opusdsp_rvv.S b/libavcodec/riscv/opusdsp_rvv.S index 389dc744f5..f9a1d9d633 100644 --- a/libavcodec/riscv/opusdsp_rvv.S +++ b/libavcodec/riscv/opusdsp_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_opus_postfilter_rvv, zve32f, b + lpad 0 flw fa0, 0(a2) // g0 slli t1, a1, 2 flw fa1, 4(a2) // g1 diff --git a/libavcodec/riscv/pixblockdsp_rvi.S b/libavcodec/riscv/pixblockdsp_rvi.S index efdd422228..ed1af70251 100644 --- a/libavcodec/riscv/pixblockdsp_rvi.S +++ b/libavcodec/riscv/pixblockdsp_rvi.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_get_pixels_8_rvi + lpad 0 .irp row, 0, 1, 2, 3, 4, 5, 6, 7 ld t0, (a1) add a1, a1, a2 @@ -47,6 +48,7 @@ func ff_get_pixels_8_rvi endfunc func ff_get_pixels_16_rvi + lpad 0 .irp row, 0, 1, 2, 3, 4, 5, 6, 7 ld t0, 0(a1) ld t1, 8(a1) diff --git a/libavcodec/riscv/pixblockdsp_rvv.S b/libavcodec/riscv/pixblockdsp_rvv.S index 4213cd1b85..85233470cf 100644 --- a/libavcodec/riscv/pixblockdsp_rvv.S +++ b/libavcodec/riscv/pixblockdsp_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_get_pixels_8_rvv, zve64x + lpad 0 vsetivli zero, 8, e8, mf2, ta, ma li t0, 8 * 8 1: @@ -32,6 +33,7 @@ func ff_get_pixels_8_rvv, zve64x endfunc func ff_get_pixels_unaligned_8_rvv, zve64x + lpad 0 andi t1, a1, 7 vsetivli zero, 8, e64, m4, ta, ma li t0, 8 * 8 @@ -52,6 +54,7 @@ func ff_get_pixels_unaligned_8_rvv, zve64x endfunc func ff_diff_pixels_rvv, zve64x + lpad 0 vsetivli zero, 8, e8, mf2, ta, ma li t0, 8 * 8 vlse64.v v16, (a1), a3 @@ -63,6 +66,7 @@ func ff_diff_pixels_rvv, zve64x endfunc func ff_diff_pixels_unaligned_rvv, zve32x + lpad 0 vsetivli zero, 8, e8, mf2, ta, ma vlsseg8e8.v v16, (a1), a3 vlsseg8e8.v v24, (a2), a3 diff --git a/libavcodec/riscv/rv34dsp_rvv.S b/libavcodec/riscv/rv34dsp_rvv.S index 8eda01665d..478bc0a860 100644 --- a/libavcodec/riscv/rv34dsp_rvv.S +++ b/libavcodec/riscv/rv34dsp_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_rv34_inv_transform_dc_rvv, zve32x + lpad 0 lh t1, 0(a0) li t0, 13 * 13 * 3 mul t2, t0, t1 @@ -33,6 +34,7 @@ func ff_rv34_inv_transform_dc_rvv, zve32x endfunc func ff_rv34_idct_dc_add_rvv, zve32x + lpad 0 vsetivli zero, 4, e8, mf4, ta, ma vlse32.v v0, (a0), a1 li t1, 169 diff --git a/libavcodec/riscv/rv40dsp_rvv.S b/libavcodec/riscv/rv40dsp_rvv.S index 53d3d1d6f9..ca431eb8ab 100644 --- a/libavcodec/riscv/rv40dsp_rvv.S +++ b/libavcodec/riscv/rv40dsp_rvv.S @@ -351,21 +351,25 @@ func ff_avg_rv40_chroma_mc_rvv, zve32x, zba endfunc func ff_put_rv40_chroma_mc8_rvv, zve32x + lpad 0 li t6, 8 j 11b endfunc func ff_put_rv40_chroma_mc4_rvv, zve32x + lpad 0 li t6, 4 j 11b endfunc func ff_avg_rv40_chroma_mc8_rvv, zve32x + lpad 0 li t6, 8 j 21b endfunc func ff_avg_rv40_chroma_mc4_rvv, zve32x + lpad 0 li t6, 4 j 21b endfunc diff --git a/libavcodec/riscv/sbrdsp_rvv.S b/libavcodec/riscv/sbrdsp_rvv.S index 7c90a8addf..23e1a8f9c2 100644 --- a/libavcodec/riscv/sbrdsp_rvv.S +++ b/libavcodec/riscv/sbrdsp_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_sbr_sum64x5_rvv, zve32f, zba + lpad 0 li a5, 64 addi a1, a0, 64 * 4 addi a2, a0, 128 * 4 @@ -50,6 +51,7 @@ func ff_sbr_sum64x5_rvv, zve32f, zba endfunc func ff_sbr_sum_square_rvv, zve32f, zba + lpad 0 vsetvli t0, zero, e32, m8, ta, ma slli a1, a1, 1 vmv.v.x v8, zero @@ -69,6 +71,7 @@ NOHWF fmv.x.w a0, fa0 endfunc func ff_sbr_autocorrelate_rvv, zve32f + lpad 0 vsetvli t0, zero, e32, m4, ta, ma vmv.v.x v0, zero flw fa0, (a0) @@ -158,6 +161,7 @@ func ff_sbr_autocorrelate_rvv, zve32f endfunc func ff_sbr_hf_gen_rvv, zve32f, zba + lpad 0 NOHWF fmv.w.x fa0, a4 NOHWF mv a4, a5 NOHWF mv a5, a6 @@ -208,6 +212,7 @@ NOHWF mv a5, a6 endfunc func ff_sbr_hf_g_filt_rvv, zve32f, zba + lpad 0 li t1, 40 * 2 * 4 sh3add a1, a4, a1 1: @@ -273,15 +278,18 @@ endfunc .endm func ff_sbr_hf_apply_noise_0_rvv, zve32f, b + lpad 0 hf_apply_noise 0 endfunc func ff_sbr_hf_apply_noise_3_rvv, zve32f, b + lpad 0 not a4, a4 // invert parity of kx // fall through endfunc func ff_sbr_hf_apply_noise_1_rvv, zve32f, b + lpad 0 vsetvli t0, zero, e32, m4, ta, ma vid.v v4 vxor.vx v4, v4, a4 @@ -290,5 +298,6 @@ func ff_sbr_hf_apply_noise_1_rvv, zve32f, b endfunc func ff_sbr_hf_apply_noise_2_rvv, zve32f, b + lpad 0 hf_apply_noise 2 endfunc diff --git a/libavcodec/riscv/startcode_rvb.S b/libavcodec/riscv/startcode_rvb.S index c043d59809..eec92d3340 100644 --- a/libavcodec/riscv/startcode_rvb.S +++ b/libavcodec/riscv/startcode_rvb.S @@ -37,6 +37,7 @@ .endm func ff_startcode_find_candidate_rvb, zbb + lpad 0 add a1, a0, a1 // Potentially unaligned head diff --git a/libavcodec/riscv/startcode_rvv.S b/libavcodec/riscv/startcode_rvv.S index 36a3369431..f4d0a0f087 100644 --- a/libavcodec/riscv/startcode_rvv.S +++ b/libavcodec/riscv/startcode_rvv.S @@ -27,6 +27,7 @@ #include "libavutil/riscv/asm.S" func ff_startcode_find_candidate_rvv, zve32x + lpad 0 mv t0, a0 1: vsetvli t1, a1, e8, m8, ta, ma diff --git a/libavcodec/riscv/svqenc_rvv.S b/libavcodec/riscv/svqenc_rvv.S index 8b7a8b0400..d37c319db7 100644 --- a/libavcodec/riscv/svqenc_rvv.S +++ b/libavcodec/riscv/svqenc_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_ssd_int8_vs_int16_rvv, zve32x, zba + lpad 0 vsetvli t0, zero, e32, m8, ta, ma vmv.v.x v24, zero 1: diff --git a/libavcodec/riscv/takdsp_rvv.S b/libavcodec/riscv/takdsp_rvv.S index f3a230ccec..a914ab8189 100644 --- a/libavcodec/riscv/takdsp_rvv.S +++ b/libavcodec/riscv/takdsp_rvv.S @@ -22,6 +22,7 @@ #include "libavutil/riscv/asm.S" func ff_decorrelate_ls_rvv, zve32x, zba + lpad 0 1: vsetvli t0, a2, e32, m8, ta, ma sub a2, a2, t0 @@ -36,6 +37,7 @@ func ff_decorrelate_ls_rvv, zve32x, zba endfunc func ff_decorrelate_sr_rvv, zve32x, zba + lpad 0 1: vsetvli t0, a2, e32, m8, ta, ma vle32.v v0, (a0) @@ -50,6 +52,7 @@ func ff_decorrelate_sr_rvv, zve32x, zba endfunc func ff_decorrelate_sm_rvv, zve32x, zba + lpad 0 1: vsetvli t0, a2, e32, m8, ta, ma vle32.v v8, (a1) @@ -68,6 +71,7 @@ func ff_decorrelate_sm_rvv, zve32x, zba endfunc func ff_decorrelate_sf_rvv, zve32x, zba + lpad 0 csrwi vxrm, 0 1: vsetvli t0, a2, e32, m8, ta, ma diff --git a/libavcodec/riscv/utvideodsp_rvv.S b/libavcodec/riscv/utvideodsp_rvv.S index 5e833eeb3c..30e195120b 100644 --- a/libavcodec/riscv/utvideodsp_rvv.S +++ b/libavcodec/riscv/utvideodsp_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_restore_rgb_planes_rvv, zve32x, zba + lpad 0 li t1, -0x80 sub a3, a3, a6 sub a4, a4, a6 @@ -53,6 +54,7 @@ func ff_restore_rgb_planes_rvv, zve32x, zba endfunc func ff_restore_rgb_planes10_rvv, zve32x, zba + lpad 0 li t1, -0x200 li t2, 0x3FF sub a3, a3, a6 diff --git a/libavcodec/riscv/vc1dsp_rvi.S b/libavcodec/riscv/vc1dsp_rvi.S index d4a1b5bf49..7725bfb628 100644 --- a/libavcodec/riscv/vc1dsp_rvi.S +++ b/libavcodec/riscv/vc1dsp_rvi.S @@ -22,6 +22,7 @@ #if __riscv_xlen >= 64 func ff_put_pixels8x8_rvi + lpad 0 .rept 8 ld t0, (a1) sd t0, (a0) @@ -33,6 +34,7 @@ func ff_put_pixels8x8_rvi endfunc func ff_put_pixels16x16_rvi + lpad 0 .rept 16 ld t0, (a1) ld t1, 8(a1) diff --git a/libavcodec/riscv/vc1dsp_rvv.S b/libavcodec/riscv/vc1dsp_rvv.S index aede87ccc0..5189d5e855 100644 --- a/libavcodec/riscv/vc1dsp_rvv.S +++ b/libavcodec/riscv/vc1dsp_rvv.S @@ -22,6 +22,7 @@ #include "libavutil/riscv/asm.S" func ff_vc1_inv_trans_8x8_dc_rvv, zve64x, zba + lpad 0 lh t2, (a2) vsetivli zero, 8, e8, mf2, ta, ma vlse64.v v0, (a0), a1 @@ -44,6 +45,7 @@ func ff_vc1_inv_trans_8x8_dc_rvv, zve64x, zba endfunc func ff_vc1_inv_trans_4x8_dc_rvv, zve32x, zba + lpad 0 lh t2, (a2) vsetivli zero, 8, e8, mf2, ta, ma vlse32.v v0, (a0), a1 @@ -68,6 +70,7 @@ func ff_vc1_inv_trans_4x8_dc_rvv, zve32x, zba endfunc func ff_vc1_inv_trans_8x4_dc_rvv, zve64x, zba + lpad 0 lh t2, (a2) vsetivli zero, 4, e8, mf4, ta, ma vlse64.v v0, (a0), a1 @@ -91,6 +94,7 @@ func ff_vc1_inv_trans_8x4_dc_rvv, zve64x, zba endfunc func ff_vc1_inv_trans_4x4_dc_rvv, zve32x + lpad 0 lh t2, (a2) vsetivli zero, 4, e8, mf4, ta, ma vlse32.v v0, (a0), a1 @@ -203,6 +207,7 @@ func ff_vc1_inv_trans_4_rvv, zve32x endfunc func ff_vc1_inv_trans_8x8_rvv, zve32x + lpad 0 csrwi vxrm, 0 vsetivli zero, 8, e16, m1, ta, ma addi a1, a0, 1 * 8 * 2 @@ -240,6 +245,7 @@ func ff_vc1_inv_trans_8x8_rvv, zve32x endfunc func ff_vc1_inv_trans_8x4_rvv, zve32x + lpad 0 csrwi vxrm, 0 vsetivli zero, 4, e16, mf2, ta, ma vlseg8e16.v v0, (a2) @@ -285,6 +291,7 @@ func ff_vc1_inv_trans_8x4_rvv, zve32x endfunc func ff_vc1_inv_trans_4x8_rvv, zve32x + lpad 0 li a3, 8 * 2 csrwi vxrm, 0 vsetivli zero, 8, e16, m1, ta, ma @@ -359,6 +366,7 @@ func ff_vc1_inv_trans_4x8_rvv, zve32x endfunc func ff_vc1_inv_trans_4x4_rvv, zve32x + lpad 0 li a3, 8 * 2 csrwi vxrm, 0 vsetivli zero, 4, e16, mf2, ta, ma @@ -422,12 +430,14 @@ endfunc .endm func ff_avg_pixels16x16_rvv, zve32x + lpad 0 li t0, 16 vsetivli zero, 16, e8, m1, ta, ma j 1f endfunc func ff_avg_pixels8x8_rvv, zve32x + lpad 0 li t0, 8 vsetivli zero, 8, e8, mf2, ta, ma 1: @@ -446,6 +456,7 @@ func ff_avg_pixels8x8_rvv, zve32x endfunc func ff_vc1_unescape_buffer_rvv, zve32x + lpad 0 vsetivli zero, 2, e8, m1, ta, ma vmv.v.i v8, -1 li t4, 1 diff --git a/libavcodec/riscv/vorbisdsp_rvv.S b/libavcodec/riscv/vorbisdsp_rvv.S index d136188d2e..82e5779955 100644 --- a/libavcodec/riscv/vorbisdsp_rvv.S +++ b/libavcodec/riscv/vorbisdsp_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_vorbis_inverse_coupling_rvv, zve32f, zba + lpad 0 fmv.w.x ft0, zero 1: vsetvli t0, a2, e32, m4, ta, ma diff --git a/libavcodec/riscv/vp7dsp_rvv.S b/libavcodec/riscv/vp7dsp_rvv.S index bfcc220273..aa0223bcb7 100644 --- a/libavcodec/riscv/vp7dsp_rvv.S +++ b/libavcodec/riscv/vp7dsp_rvv.S @@ -22,6 +22,7 @@ #if __riscv_xlen >= 64 func ff_vp7_luma_dc_wht_rvv, zve32x, zba + lpad 0 li a2, 4 * 16 * 2 li a7, 16 * 2 jal t0, 1f @@ -99,6 +100,7 @@ func ff_vp7_luma_dc_wht_rvv, zve32x, zba endfunc func ff_vp7_idct_add_rvv, zve32x + lpad 0 jal t0, 1b csrwi vxrm, 2 vsetvli zero, zero, e8, mf4, ta, ma @@ -130,6 +132,7 @@ endfunc .irp type, y, uv func ff_vp7_idct_dc_add4\type\()_rvv, zve32x + lpad 0 li t0, 32 vsetivli zero, 4, e16, mf2, ta, ma li t1, 23170 diff --git a/libavcodec/riscv/vp8dsp_rvi.S b/libavcodec/riscv/vp8dsp_rvi.S index 50ba4f293f..07d5c85032 100644 --- a/libavcodec/riscv/vp8dsp_rvi.S +++ b/libavcodec/riscv/vp8dsp_rvi.S @@ -22,6 +22,7 @@ #if __riscv_xlen >= 64 func ff_put_vp8_pixels16_rvi + lpad 0 1: addi a4, a4, -1 ld t0, (a2) @@ -36,6 +37,7 @@ func ff_put_vp8_pixels16_rvi endfunc func ff_put_vp8_pixels8_rvi + lpad 0 1: addi a4, a4, -1 ld t0, (a2) @@ -49,6 +51,7 @@ endfunc #endif func ff_put_vp8_pixels4_rvi + lpad 0 1: addi a4, a4, -1 lw t0, (a2) diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S index eb1492a3b9..839d228b0f 100644 --- a/libavcodec/riscv/vp8dsp_rvv.S +++ b/libavcodec/riscv/vp8dsp_rvv.S @@ -45,6 +45,7 @@ #if __riscv_xlen >= 64 func ff_vp8_luma_dc_wht_rvv, zve64x + lpad 0 vsetivli zero, 1, e64, m1, ta, ma vlseg4e64.v v4, (a1) vsetivli zero, 4, e16, mf2, ta, ma @@ -99,6 +100,7 @@ endfunc #endif func ff_vp8_idct_add_rvv, zve32x + lpad 0 csrwi vxrm, 0 vsetivli zero, 4, e16, mf2, ta, ma addi a3, a1, 1 * 4 * 2 @@ -158,6 +160,7 @@ func ff_vp8_idct_add_rvv, zve32x endfunc func ff_vp8_idct_dc_add_rvv, zve32x + lpad 0 lh a3, (a1) addi a3, a3, 4 srai a3, a3, 3 @@ -182,6 +185,7 @@ func ff_vp78_idct_dc_add_rvv, zve32x endfunc func ff_vp8_idct_dc_add4y_rvv, zve32x + lpad 0 li t0, 32 vsetivli zero, 4, e16, mf2, ta, ma li t1, 4 - (128 << 3) @@ -217,6 +221,7 @@ func ff_vp78_idct_dc_add4y_rvv, zve32x endfunc func ff_vp8_idct_dc_add4uv_rvv, zve32x + lpad 0 li t0, 32 vsetivli zero, 4, e16, mf2, ta, ma li t1, 4 - (128 << 3) @@ -265,6 +270,7 @@ endfunc .macro put_vp8_bilin_h_v type mn func ff_put_vp8_bilin4_\type\()_rvv, zve32x + lpad 0 vsetvlstatic8 4 .Lbilin_\type: li t1, 8 @@ -310,6 +316,7 @@ put_vp8_bilin_h_v h a5 put_vp8_bilin_h_v v a6 func ff_put_vp8_bilin4_hv_rvv, zve32x + lpad 0 vsetvlstatic8 4 .Lbilin_hv: li t3, 8 @@ -335,16 +342,19 @@ endfunc .irp len,16,8 func ff_put_vp8_bilin\len\()_h_rvv, zve32x + lpad 0 vsetvlstatic8 \len j .Lbilin_h endfunc func ff_put_vp8_bilin\len\()_v_rvv, zve32x + lpad 0 vsetvlstatic8 \len j .Lbilin_v endfunc func ff_put_vp8_bilin\len\()_hv_rvv, zve32x + lpad 0 vsetvlstatic8 \len j .Lbilin_hv endfunc @@ -441,6 +451,7 @@ endconst .macro epel len size type func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x, zba + lpad 0 epel_filter \size \type t vsetvlstatic8 \len 1: @@ -456,6 +467,7 @@ endfunc .macro epel_hv len hsize vsize func ff_put_vp8_epel\len\()_h\hsize\()v\vsize\()_rvv, zve32x, zba + lpad 0 #if __riscv_xlen == 64 addi sp, sp, -48 .irp n,0,1,2,3,4,5 diff --git a/libavcodec/riscv/vp9_intra_rvi.S b/libavcodec/riscv/vp9_intra_rvi.S index dadd4be194..c294b6c551 100644 --- a/libavcodec/riscv/vp9_intra_rvi.S +++ b/libavcodec/riscv/vp9_intra_rvi.S @@ -22,6 +22,7 @@ #if __riscv_xlen >= 64 func ff_v_32x32_rvi, zba + lpad 0 ld t0, (a3) ld t1, 8(a3) ld t2, 16(a3) @@ -43,6 +44,7 @@ func ff_v_32x32_rvi, zba endfunc func ff_v_16x16_rvi, zba + lpad 0 ld t0, (a3) ld t1, 8(a3) .rept 8 @@ -58,6 +60,7 @@ func ff_v_16x16_rvi, zba endfunc func ff_v_8x8_rvi, zba + lpad 0 ld t0, (a3) .rept 4 add a7, a0, a1 diff --git a/libavcodec/riscv/vp9_intra_rvv.S b/libavcodec/riscv/vp9_intra_rvv.S index beeb1ff88c..13d695c831 100644 --- a/libavcodec/riscv/vp9_intra_rvv.S +++ b/libavcodec/riscv/vp9_intra_rvv.S @@ -91,6 +91,7 @@ .macro func_dc name size type n restore ext func ff_\()\name\()_\()\size\()x\size\()_rvv, \ext + lpad 0 .if \size == 8 dc_e64 \type \size \n \restore .else @@ -119,6 +120,7 @@ func_dc dc_top 16 top 4 1 zve32x func_dc dc_top 8 top 3 0 zve64x func ff_h_32x32_rvv, zve32x + lpad 0 li t0, 32 addi a2, a2, 31 vsetvli zero, t0, e8, m2, ta, ma @@ -139,6 +141,7 @@ func ff_h_32x32_rvv, zve32x endfunc func ff_h_16x16_rvv, zve32x + lpad 0 addi a2, a2, 15 vsetivli zero, 16, e8, m1, ta, ma @@ -157,6 +160,7 @@ func ff_h_16x16_rvv, zve32x endfunc func ff_h_8x8_rvv, zve32x + lpad 0 addi a2, a2, 7 vsetivli zero, 8, e8, mf2, ta, ma @@ -190,6 +194,7 @@ endfunc .endm func ff_tm_32x32_rvv, zve32x + lpad 0 lbu a4, -1(a3) li t5, 32 @@ -244,6 +249,7 @@ func ff_tm_16x16_rvv, zve32x endfunc func ff_tm_8x8_rvv, zve32x + lpad 0 vsetivli zero, 8, e16, m1, ta, ma vle8.v v8, (a3) vzext.vf2 v28, v8 @@ -269,6 +275,7 @@ func ff_tm_8x8_rvv, zve32x endfunc func ff_tm_4x4_rvv, zve32x + lpad 0 vsetivli zero, 4, e16, mf2, ta, ma vle8.v v8, (a3) vzext.vf2 v28, v8 diff --git a/libavcodec/riscv/vp9_mc_rvi.S b/libavcodec/riscv/vp9_mc_rvi.S index 0db14e83c7..4a8371b232 100644 --- a/libavcodec/riscv/vp9_mc_rvi.S +++ b/libavcodec/riscv/vp9_mc_rvi.S @@ -22,6 +22,7 @@ #if __riscv_xlen >= 64 func ff_copy64_rvi + lpad 0 1: addi a4, a4, -1 ld t0, (a2) @@ -48,6 +49,7 @@ func ff_copy64_rvi endfunc func ff_copy32_rvi + lpad 0 1: addi a4, a4, -1 ld t0, (a2) @@ -66,6 +68,7 @@ func ff_copy32_rvi endfunc func ff_copy16_rvi + lpad 0 1: addi a4, a4, -1 ld t0, (a2) @@ -80,6 +83,7 @@ func ff_copy16_rvi endfunc func ff_copy8_rvi + lpad 0 1: addi a4, a4, -1 ld t0, (a2) @@ -93,6 +97,7 @@ endfunc #endif func ff_copy4_rvi + lpad 0 1: addi a4, a4, -1 lw t0, (a2) diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S index 7cb38ec94a..8d776661d9 100644 --- a/libavcodec/riscv/vp9_mc_rvv.S +++ b/libavcodec/riscv/vp9_mc_rvv.S @@ -38,6 +38,7 @@ .macro copy_avg len func ff_vp9_avg\len\()_rvv, zve32x + lpad 0 csrwi vxrm, 0 vsetvlstatic8 \len, t0, 64 1: