From 884078d2df0fecd769afdd916fac8727dbd7d632 Mon Sep 17 00:00:00 2001 From: Christophe Gisquet Date: Wed, 28 May 2014 01:03:23 +0200 Subject: [PATCH] x86: huffyuvdsp: add SSE2 median prediction From 5010c to 4566 on lagarith YUY2. Signed-off-by: Michael Niedermayer --- libavcodec/x86/huffyuvdsp.asm | 98 +++++++++++++++++++------------- libavcodec/x86/huffyuvdsp_init.c | 4 ++ 2 files changed, 64 insertions(+), 38 deletions(-) diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm index a923e70e1e..7ebb07c14e 100644 --- a/libavcodec/x86/huffyuvdsp.asm +++ b/libavcodec/x86/huffyuvdsp.asm @@ -33,64 +33,86 @@ SECTION_TEXT ; void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top, ; const uint8_t *diff, int w, ; int *left, int *left_top) -INIT_MMX mmxext -cglobal add_hfyu_median_pred, 6,6,0, dst, top, diff, w, left, left_top - movq mm0, [topq] - movq mm2, mm0 - movd mm4, [left_topq] - psllq mm2, 8 - movq mm1, mm0 - por mm4, mm2 - movd mm3, [leftq] - psubb mm0, mm4 ; t-tl +%macro LSHIFT 2 +%if mmsize > 8 + pslldq %1, %2 +%else + psllq %1, 8*(%2) +%endif +%endmacro + +%macro RSHIFT 2 +%if mmsize > 8 + psrldq %1, %2 +%else + psrlq %1, 8*(%2) +%endif +%endmacro + +%macro HFYU_MEDIAN 0 +cglobal add_hfyu_median_pred, 6,6,8, dst, top, diff, w, left, left_top + movu m0, [topq] + mova m2, m0 + movd m4, [left_topq] + LSHIFT m2, 1 + mova m1, m0 + por m4, m2 + movd m3, [leftq] + psubb m0, m4 ; t-tl add dstq, wq add topq, wq add diffq, wq neg wq jmp .skip .loop: - movq mm4, [topq+wq] - movq mm0, mm4 - psllq mm4, 8 - por mm4, mm1 - movq mm1, mm0 ; t - psubb mm0, mm4 ; t-tl + movu m4, [topq+wq] + mova m0, m4 + LSHIFT m4, 1 + por m4, m1 + mova m1, m0 ; t + psubb m0, m4 ; t-tl .skip: - movq mm2, [diffq+wq] + movu m2, [diffq+wq] %assign i 0 -%rep 8 - movq mm4, mm0 - paddb mm4, mm3 ; t-tl+l - movq mm5, mm3 - pmaxub mm3, mm1 - pminub mm5, mm1 - pminub mm3, mm4 - pmaxub mm3, mm5 ; median - paddb mm3, mm2 ; +residual +%rep mmsize + mova m4, m0 + paddb m4, m3 ; t-tl+l + mova m5, m3 + pmaxub m3, m1 + pminub m5, m1 + pminub m3, m4 + pmaxub m3, m5 ; median + paddb m3, m2 ; +residual %if i==0 - movq mm7, mm3 - psllq mm7, 56 + mova m7, m3 + LSHIFT m7, mmsize-1 %else - movq mm6, mm3 - psrlq mm7, 8 - psllq mm6, 56 - por mm7, mm6 + mova m6, m3 + RSHIFT m7, 1 + LSHIFT m6, mmsize-1 + por m7, m6 %endif -%if i<7 - psrlq mm0, 8 - psrlq mm1, 8 - psrlq mm2, 8 +%if iadd_bytes = ff_add_bytes_sse2; + c->add_hfyu_median_pred = ff_add_hfyu_median_pred_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) {