Convert deinterlacing MMX code to YASM

Originally committed as revision 24615 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Vitor Sessak 2010-07-31 14:50:51 +00:00
parent c2eae137e9
commit de4bc44abb
4 changed files with 110 additions and 84 deletions

View File

@ -39,7 +39,6 @@
#include "libavcore/imgutils.h"
#if HAVE_MMX
#include "x86/mmx.h"
#include "x86/dsputil_mmx.h"
#endif
@ -55,6 +54,14 @@
#define FF_PIXEL_PACKED 1 /**< only one components containing all the channels */
#define FF_PIXEL_PALETTE 2 /**< one components containing indexes for a palette */
#if HAVE_MMX
#define deinterlace_line_inplace ff_deinterlace_line_inplace_mmx
#define deinterlace_line ff_deinterlace_line_mmx
#else
#define deinterlace_line_inplace deinterlace_line_inplace_c
#define deinterlace_line deinterlace_line_c
#endif
typedef struct PixFmtInfo {
uint8_t nb_channels; /**< number of channels (including alpha) */
uint8_t color_type; /**< color type (see FF_COLOR_xxx constants) */
@ -1119,61 +1126,14 @@ int img_get_alpha_info(const AVPicture *src,
return ret;
}
#if HAVE_MMX
#define DEINT_INPLACE_LINE_LUM \
movd_m2r(lum_m4[0],mm0);\
movd_m2r(lum_m3[0],mm1);\
movd_m2r(lum_m2[0],mm2);\
movd_m2r(lum_m1[0],mm3);\
movd_m2r(lum[0],mm4);\
punpcklbw_r2r(mm7,mm0);\
movd_r2m(mm2,lum_m4[0]);\
punpcklbw_r2r(mm7,mm1);\
punpcklbw_r2r(mm7,mm2);\
punpcklbw_r2r(mm7,mm3);\
punpcklbw_r2r(mm7,mm4);\
paddw_r2r(mm3,mm1);\
psllw_i2r(1,mm2);\
paddw_r2r(mm4,mm0);\
psllw_i2r(2,mm1);\
paddw_r2r(mm6,mm2);\
paddw_r2r(mm2,mm1);\
psubusw_r2r(mm0,mm1);\
psrlw_i2r(3,mm1);\
packuswb_r2r(mm7,mm1);\
movd_r2m(mm1,lum_m2[0]);
#define DEINT_LINE_LUM \
movd_m2r(lum_m4[0],mm0);\
movd_m2r(lum_m3[0],mm1);\
movd_m2r(lum_m2[0],mm2);\
movd_m2r(lum_m1[0],mm3);\
movd_m2r(lum[0],mm4);\
punpcklbw_r2r(mm7,mm0);\
punpcklbw_r2r(mm7,mm1);\
punpcklbw_r2r(mm7,mm2);\
punpcklbw_r2r(mm7,mm3);\
punpcklbw_r2r(mm7,mm4);\
paddw_r2r(mm3,mm1);\
psllw_i2r(1,mm2);\
paddw_r2r(mm4,mm0);\
psllw_i2r(2,mm1);\
paddw_r2r(mm6,mm2);\
paddw_r2r(mm2,mm1);\
psubusw_r2r(mm0,mm1);\
psrlw_i2r(3,mm1);\
packuswb_r2r(mm7,mm1);\
movd_r2m(mm1,dst[0]);
#endif
#if !HAVE_MMX
/* filter parameters: [-1 4 2 4 -1] // 8 */
static void deinterlace_line(uint8_t *dst,
static void deinterlace_line_c(uint8_t *dst,
const uint8_t *lum_m4, const uint8_t *lum_m3,
const uint8_t *lum_m2, const uint8_t *lum_m1,
const uint8_t *lum,
int size)
{
#if !HAVE_MMX
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
int sum;
@ -1191,27 +1151,12 @@ static void deinterlace_line(uint8_t *dst,
lum++;
dst++;
}
#else
{
pxor_r2r(mm7,mm7);
movq_m2r(ff_pw_4,mm6);
}
for (;size > 3; size-=4) {
DEINT_LINE_LUM
lum_m4+=4;
lum_m3+=4;
lum_m2+=4;
lum_m1+=4;
lum+=4;
dst+=4;
}
#endif
}
static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *lum_m2, uint8_t *lum_m1, uint8_t *lum,
int size)
static void deinterlace_line_inplace_c(uint8_t *lum_m4, uint8_t *lum_m3,
uint8_t *lum_m2, uint8_t *lum_m1,
uint8_t *lum, int size)
{
#if !HAVE_MMX
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
int sum;
@ -1229,22 +1174,8 @@ static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *
lum_m1++;
lum++;
}
#else
{
pxor_r2r(mm7,mm7);
movq_m2r(ff_pw_4,mm6);
}
for (;size > 3; size-=4) {
DEINT_INPLACE_LINE_LUM
lum_m4+=4;
lum_m3+=4;
lum_m2+=4;
lum_m1+=4;
lum+=4;
}
#endif
}
#endif
/* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
top field is copied as is, but the bottom field is deinterlaced

View File

@ -35,6 +35,7 @@ MMX-OBJS-$(CONFIG_VP6_DECODER) += x86/vp3dsp_mmx.o \
YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o
MMX-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp-init.o
MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \
x86/deinterlace.o \
$(YASM-OBJS-yes)
MMX-OBJS-$(CONFIG_FFT) += x86/fft.o

View File

@ -0,0 +1,81 @@
;******************************************************************************
;* MMX optimized deinterlacing functions
;* Copyright (c) 2010 Vitor Sessak
;* Copyright (c) 2002 Michael Niedermayer
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include "x86inc.asm"
%include "x86util.asm"
SECTION_RODATA
cextern pw_4
%macro DEINTERLACE 1
%ifidn %1, inplace
;void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum, int size)
cglobal deinterlace_line_inplace_mmx, 6,6,7, lum_m4, lum_m3, lum_m2, lum_m1, lum, size
%else
;void ff_deinterlace_line_mmx(uint8_t *dst, const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum, int size)
cglobal deinterlace_line_mmx, 7,7,7, dst, lum_m4, lum_m3, lum_m2, lum_m1, lum, size
%endif
pxor mm7, mm7
movq mm6, [pw_4]
.nextrow
movd mm0, [lum_m4q]
movd mm1, [lum_m3q]
movd mm2, [lum_m2q]
%ifidn %1, inplace
movd [lum_m4q], mm2
%endif
movd mm3, [lum_m1q]
movd mm4, [lumq]
punpcklbw mm0, mm7
punpcklbw mm1, mm7
punpcklbw mm2, mm7
punpcklbw mm3, mm7
punpcklbw mm4, mm7
paddw mm1, mm3
psllw mm2, 1
paddw mm0, mm4
psllw mm1, 2
paddw mm2, mm6
paddw mm1, mm2
psubusw mm1, mm0
psrlw mm1, 3
packuswb mm1, mm7
%ifidn %1, inplace
movd [lum_m2q], mm1
%else
movd [dstq], mm1
add dstq, 4
%endif
add lum_m4q, 4
add lum_m3q, 4
add lum_m2q, 4
add lum_m1q, 4
add lumq, 4
sub sized, 4
jg .nextrow
REP_RET
%endmacro
DEINTERLACE ""
DEINTERLACE inplace

View File

@ -179,4 +179,17 @@ void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag,
void ff_mmx_idct(DCTELEM *block);
void ff_mmxext_idct(DCTELEM *block);
void ff_deinterlace_line_mmx(uint8_t *dst,
const uint8_t *lum_m4, const uint8_t *lum_m3,
const uint8_t *lum_m2, const uint8_t *lum_m1,
const uint8_t *lum,
int size);
void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
const uint8_t *lum_m3,
const uint8_t *lum_m2,
const uint8_t *lum_m1,
const uint8_t *lum, int size);
#endif /* AVCODEC_X86_DSPUTIL_MMX_H */