diff --git a/libswscale/riscv/input_rvv.S b/libswscale/riscv/input_rvv.S index d07db43b55..759f157713 100644 --- a/libswscale/riscv/input_rvv.S +++ b/libswscale/riscv/input_rvv.S @@ -21,12 +21,14 @@ #include "libavutil/riscv/asm.S" func ff_bgr24ToY_rvv, zve32x + lpad 0 lw t1, 8(a5) # BY lw t3, 0(a5) # RY j 1f endfunc func ff_rgb24ToY_rvv, zve32x, zba + lpad 0 lw t1, 0(a5) # RY lw t3, 8(a5) # BY 1: @@ -55,6 +57,7 @@ func ff_rgb24ToY_rvv, zve32x, zba endfunc func ff_bgr24ToUV_rvv, zve32x + lpad 0 lw t1, 20(a6) # BU lw t4, 32(a6) # BV lw t3, 12(a6) # RU @@ -63,6 +66,7 @@ func ff_bgr24ToUV_rvv, zve32x endfunc func ff_rgb24ToUV_rvv, zve32x, zba + lpad 0 lw t1, 12(a6) # RU lw t4, 24(a6) # RV lw t3, 20(a6) # BU @@ -101,6 +105,7 @@ func ff_rgb24ToUV_rvv, zve32x, zba endfunc func ff_bgr24ToUV_half_rvv, zve32x + lpad 0 lw t1, 20(a6) # BU lw t4, 32(a6) # BV lw t3, 12(a6) # RU @@ -109,6 +114,7 @@ func ff_bgr24ToUV_half_rvv, zve32x endfunc func ff_rgb24ToUV_half_rvv, zve32x, zba + lpad 0 lw t1, 12(a6) # RU lw t4, 24(a6) # RV lw t3, 20(a6) # BU @@ -152,12 +158,14 @@ endfunc .macro rgba_input chr0, chr1, high func ff_\chr1\()ToY_rvv, zve32x + lpad 0 lw t1, 8(a5) # BY lw t3, 0(a5) # RY j 1f endfunc func ff_\chr0\()ToY_rvv, zve32x, zba + lpad 0 lw t1, 0(a5) # RY lw t3, 8(a5) # BY 1: @@ -192,6 +200,7 @@ func ff_\chr0\()ToY_rvv, zve32x, zba endfunc func ff_\chr1\()ToUV_rvv, zve32x + lpad 0 lw t1, 20(a6) # BU lw t4, 32(a6) # BV lw t3, 12(a6) # RU @@ -200,6 +209,7 @@ func ff_\chr1\()ToUV_rvv, zve32x endfunc func ff_\chr0\()ToUV_rvv, zve32x, zba + lpad 0 lw t1, 12(a6) # RU lw t4, 24(a6) # RV lw t3, 20(a6) # BU @@ -244,6 +254,7 @@ func ff_\chr0\()ToUV_rvv, zve32x, zba endfunc func ff_\chr1\()ToUV_half_rvv, zve32x + lpad 0 lw t1, 20(a6) # BU lw t4, 32(a6) # BV lw t3, 12(a6) # RU @@ -252,6 +263,7 @@ func ff_\chr1\()ToUV_half_rvv, zve32x endfunc func ff_\chr0\()ToUV_half_rvv, zve32x, zba + lpad 0 lw t1, 12(a6) # RU lw t4, 24(a6) # RV lw t3, 20(a6) # BU diff --git a/libswscale/riscv/range_rvv.S b/libswscale/riscv/range_rvv.S index 19a74eba79..24005170af 100644 --- a/libswscale/riscv/range_rvv.S +++ b/libswscale/riscv/range_rvv.S @@ -21,6 +21,7 @@ #include "libavutil/riscv/asm.S" func ff_range_lum_to_jpeg_16_rvv, zve32x, zba + lpad 0 li t1, 30189 li t2, 19077 li t3, -39057361 @@ -42,6 +43,7 @@ func ff_range_lum_to_jpeg_16_rvv, zve32x, zba endfunc func ff_range_lum_from_jpeg_16_rvv, zve32x, zba + lpad 0 li t1, 14071 li t2, 33561947 1: @@ -61,6 +63,7 @@ func ff_range_lum_from_jpeg_16_rvv, zve32x, zba endfunc func ff_range_chr_to_jpeg_16_rvv, zve32x, zba + lpad 0 li t1, 30775 li t2, 4663 li t3, -9289992 @@ -89,6 +92,7 @@ func ff_range_chr_to_jpeg_16_rvv, zve32x, zba endfunc func ff_range_chr_from_jpeg_16_rvv, zve32x, zba + lpad 0 li t1, 1799 li t2, 4081085 1: diff --git a/libswscale/riscv/rgb2rgb_rvb.S b/libswscale/riscv/rgb2rgb_rvb.S index d18e5ba01b..af9ce2d215 100644 --- a/libswscale/riscv/rgb2rgb_rvb.S +++ b/libswscale/riscv/rgb2rgb_rvb.S @@ -24,6 +24,7 @@ #if (__riscv_xlen >= 64) func ff_shuffle_bytes_3210_rvb, zba, zbb + lpad 0 srli a2, a2, 2 bswap32_rvb a1, a0, a2 endfunc diff --git a/libswscale/riscv/rgb2rgb_rvv.S b/libswscale/riscv/rgb2rgb_rvv.S index e1270ac0df..a1a927b00f 100644 --- a/libswscale/riscv/rgb2rgb_rvv.S +++ b/libswscale/riscv/rgb2rgb_rvv.S @@ -21,11 +21,13 @@ #include "libavutil/riscv/asm.S" func ff_shuffle_bytes_0321_rvv, zve32x + lpad 0 li t1, 0x00ff00ff j 1f endfunc func ff_shuffle_bytes_2103_rvv, zve32x, zba + lpad 0 li t1, ~0x00ff00ff 1: not t2, t1 @@ -49,12 +51,14 @@ func ff_shuffle_bytes_2103_rvv, zve32x, zba endfunc func ff_shuffle_bytes_1230_rvv, zve32x + lpad 0 li t1, 24 li t2, 8 j 3f endfunc func ff_shuffle_bytes_3012_rvv, zve32x, zba + lpad 0 li t1, 8 li t2, 24 3: @@ -75,6 +79,7 @@ func ff_shuffle_bytes_3012_rvv, zve32x, zba endfunc func ff_interleave_bytes_rvv, zve32x, zba + lpad 0 1: mv t0, a0 mv t1, a1 @@ -101,6 +106,7 @@ func ff_interleave_bytes_rvv, zve32x, zba endfunc func ff_deinterleave_bytes_rvv, zve32x, zba + lpad 0 1: mv t0, a0 mv t1, a1 @@ -127,6 +133,7 @@ func ff_deinterleave_bytes_rvv, zve32x, zba endfunc .macro yuy2_to_i422p luma, chroma + lpad 0 srai t4, a4, 1 // pixel width -> chroma width lw t6, (sp) slli t5, a4, 1 // pixel width -> (source) byte width