lavc/vp8dsp: add R-V V vp7_idct_dc_add

This just computes the direct coefficient and hands over to code shared
with VP8. Accordingly the bulk of changes are just rewriting the VP8
code to share.

Nothing to write home about:
vp7_idct_dc_add_c:       1.7
vp7_idct_dc_add_rvv_i32: 1.2
This commit is contained in:
Rémi Denis-Courmont 2024-06-01 18:55:44 +03:00
parent d866f49791
commit 30797e4ff6
2 changed files with 34 additions and 8 deletions

View File

@ -27,6 +27,15 @@
void ff_vp7_luma_dc_wht_rvv(int16_t block[4][4][16], int16_t dc[16]);
void ff_vp7_idct_add_rvv(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
void ff_vp78_idct_dc_add_rvv(uint8_t *, int16_t block[16], ptrdiff_t, int dc);
static void ff_vp7_idct_dc_add_rvv(uint8_t *dst, int16_t block[16],
ptrdiff_t stride)
{
int dc = (23170 * (23170 * block[0] >> 14) + 0x20000) >> 18;
ff_vp78_idct_dc_add_rvv(dst, block, stride, dc);
}
av_cold void ff_vp7dsp_init_riscv(VP8DSPContext *c)
{
@ -37,8 +46,9 @@ av_cold void ff_vp7dsp_init_riscv(VP8DSPContext *c)
ff_rv_vlen_least(128)) {
#if __riscv_xlen >= 64
c->vp8_luma_dc_wht = ff_vp7_luma_dc_wht_rvv;
#endif
c->vp8_idct_add = ff_vp7_idct_add_rvv;
#endif
c->vp8_idct_dc_add = ff_vp7_idct_dc_add_rvv;
}
#endif
}

View File

@ -98,6 +98,29 @@ func ff_vp8_luma_dc_wht_rvv, zve64x
endfunc
#endif
func ff_vp8_idct_dc_add_rvv, zve32x
lh a3, (a1)
addi a3, a3, 4
srai a3, a3, 3
# fall through
endfunc
func ff_vp78_idct_dc_add_rvv, zve32x
csrwi vxrm, 0
vsetivli zero, 4, e8, mf4, ta, ma
sh zero, (a1)
vlse32.v v8, (a0), a2
vsetivli zero, 16, e16, m2, ta, ma
vzext.vf2 v16, v8
vadd.vx v16, v16, a3
vmax.vx v16, v16, zero
vsetvli zero, zero, e8, m1, ta, ma
vnclipu.wi v8, v16, 0
vsetivli zero, 4, e8, mf4, ta, ma
vsse32.v v8, (a0), a2
ret
endfunc
.macro vp8_idct_dc_add
vlse32.v v0, (a0), a2
lh a5, 0(a1)
@ -120,13 +143,6 @@ endfunc
addi a1, a1, 32
.endm
func ff_vp8_idct_dc_add_rvv, zve32x
vsetivli zero, 4, e8, mf4, ta, ma
vp8_idct_dc_add
ret
endfunc
func ff_vp8_idct_dc_add4y_rvv, zve32x
vsetivli zero, 4, e8, mf4, ta, ma
.rept 3