FFmpeg/libavcodec/x86/dsputil_mmx.h

/*
 * MMX optimized DSP utils
 * Copyright (c) 2007  Aurelien Jacobs <aurel@gnuage.org>
 *
 * This file is part of Libav.
 *
 * Libav is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * Libav is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Libav; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#ifndef AVCODEC_X86_DSPUTIL_MMX_H
#define AVCODEC_X86_DSPUTIL_MMX_H

#include <stddef.h>
#include <stdint.h>

#include "libavcodec/dsputil.h"
#include "libavutil/x86/asm.h"
#include "constants.h"

#define MOVQ_WONE(regd) \
    __asm__ volatile ( \
    "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
    "psrlw $15, %%" #regd ::)

#define JUMPALIGN()     __asm__ volatile (".p2align 3"::)
#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)

#define MOVQ_BFE(regd)                                  \
    __asm__ volatile (                                  \
        "pcmpeqd %%"#regd", %%"#regd"   \n\t"           \
        "paddb   %%"#regd", %%"#regd"   \n\t" ::)

#ifndef PIC
#define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_wtwo))
#else
// for shared library it's better to use this way for accessing constants
// pcmpeqd -> -1
#define MOVQ_WTWO(regd)                                 \
    __asm__ volatile (                                  \
        "pcmpeqd %%"#regd", %%"#regd"   \n\t"           \
        "psrlw         $15, %%"#regd"   \n\t"           \
        "psllw          $1, %%"#regd"   \n\t"::)

#endif

// using regr as temporary and for the output result
// first argument is unmodifed and second is trashed
// regfe is supposed to contain 0xfefefefefefefefe
#define PAVGB_MMX_NO_RND(rega, regb, regr, regfe)                \
    "movq   "#rega", "#regr"            \n\t"                    \
    "pand   "#regb", "#regr"            \n\t"                    \
    "pxor   "#rega", "#regb"            \n\t"                    \
    "pand  "#regfe", "#regb"            \n\t"                    \
    "psrlq       $1, "#regb"            \n\t"                    \
    "paddb  "#regb", "#regr"            \n\t"

#define PAVGB_MMX(rega, regb, regr, regfe)                       \
    "movq   "#rega", "#regr"            \n\t"                    \
    "por    "#regb", "#regr"            \n\t"                    \
    "pxor   "#rega", "#regb"            \n\t"                    \
    "pand  "#regfe", "#regb"            \n\t"                    \
    "psrlq       $1, "#regb"            \n\t"                    \
    "psubb  "#regb", "#regr"            \n\t"

// mm6 is supposed to contain 0xfefefefefefefefe
#define PAVGBP_MMX_NO_RND(rega, regb, regr,  regc, regd, regp)   \
    "movq  "#rega", "#regr"             \n\t"                    \
    "movq  "#regc", "#regp"             \n\t"                    \
    "pand  "#regb", "#regr"             \n\t"                    \
    "pand  "#regd", "#regp"             \n\t"                    \
    "pxor  "#rega", "#regb"             \n\t"                    \
    "pxor  "#regc", "#regd"             \n\t"                    \
    "pand    %%mm6, "#regb"             \n\t"                    \
    "pand    %%mm6, "#regd"             \n\t"                    \
    "psrlq      $1, "#regb"             \n\t"                    \
    "psrlq      $1, "#regd"             \n\t"                    \
    "paddb "#regb", "#regr"             \n\t"                    \
    "paddb "#regd", "#regp"             \n\t"

#define PAVGBP_MMX(rega, regb, regr, regc, regd, regp)           \
    "movq  "#rega", "#regr"             \n\t"                    \
    "movq  "#regc", "#regp"             \n\t"                    \
    "por   "#regb", "#regr"             \n\t"                    \
    "por   "#regd", "#regp"             \n\t"                    \
    "pxor  "#rega", "#regb"             \n\t"                    \
    "pxor  "#regc", "#regd"             \n\t"                    \
    "pand    %%mm6, "#regb"             \n\t"                    \
    "pand    %%mm6, "#regd"             \n\t"                    \
    "psrlq      $1, "#regd"             \n\t"                    \
    "psrlq      $1, "#regb"             \n\t"                    \
    "psubb "#regb", "#regr"             \n\t"                    \
    "psubb "#regd", "#regp"             \n\t"

void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);

void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);
void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, int line_size);


void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
                        ptrdiff_t line_size, int h);
void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
                         ptrdiff_t line_size, int h);
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
                        ptrdiff_t line_size, int h);
void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
                         ptrdiff_t line_size, int h);
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
                           ptrdiff_t line_size, int h);
void ff_put_pixels8_mmxext(uint8_t *block, const uint8_t *pixels,
                           ptrdiff_t line_size, int h);
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
                          ptrdiff_t line_size, int h);
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
                          ptrdiff_t line_size, int h);

void ff_avg_pixels8_x2_mmx(uint8_t *block, const uint8_t *pixels,
                           ptrdiff_t line_size, int h);

void ff_avg_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
                            ptrdiff_t line_size, int h);
void ff_avg_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
                             ptrdiff_t line_size, int h);

void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
                            ptrdiff_t line_size, int h);
void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
                             ptrdiff_t line_size, int h);

void ff_deinterlace_line_mmx(uint8_t *dst,
                             const uint8_t *lum_m4, const uint8_t *lum_m3,
                             const uint8_t *lum_m2, const uint8_t *lum_m1,
                             const uint8_t *lum,
                             int size);

void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
                                     const uint8_t *lum_m3,
                                     const uint8_t *lum_m2,
                                     const uint8_t *lum_m1,
                                     const uint8_t *lum, int size);

#endif /* AVCODEC_X86_DSPUTIL_MMX_H */
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-27 22:23:34 +00:00			`/*`
			`* MMX optimized DSP utils`
			`* Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>`
			`*`
Replace FFmpeg with Libav in licence headers Signed-off-by: Mans Rullgard <mans@mansr.com> 2011-03-18 17:35:10 +00:00			`* This file is part of Libav.`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-27 22:23:34 +00:00			`*`
Replace FFmpeg with Libav in licence headers Signed-off-by: Mans Rullgard <mans@mansr.com> 2011-03-18 17:35:10 +00:00			`* Libav is free software; you can redistribute it and/or`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-27 22:23:34 +00:00			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2.1 of the License, or (at your option) any later version.`
			`*`
Replace FFmpeg with Libav in licence headers Signed-off-by: Mans Rullgard <mans@mansr.com> 2011-03-18 17:35:10 +00:00			`* Libav is distributed in the hope that it will be useful,`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-27 22:23:34 +00:00			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
Replace FFmpeg with Libav in licence headers Signed-off-by: Mans Rullgard <mans@mansr.com> 2011-03-18 17:35:10 +00:00			`* License along with Libav; if not, write to the Free Software`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-27 22:23:34 +00:00			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`*/`

Rename libavcodec/i386/ --> libavcodec/x86/. It contains optimizations that are not specific to i386 and libavutil uses this naming scheme already. Originally committed as revision 16270 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-22 09:12:42 +00:00			`#ifndef AVCODEC_X86_DSPUTIL_MMX_H`
			`#define AVCODEC_X86_DSPUTIL_MMX_H`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-27 22:23:34 +00:00
x86: vc1dsp: Move ff_avg_vc1_mspel_mc00_mmxext out of dsputil_mmx.c 2013-02-26 16:29:48 +00:00			`#include <stddef.h>`
add required include to make this file self-contained Originally committed as revision 11211 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-12-12 22:45:03 +00:00			`#include <stdint.h>`
x86: vc1dsp: Move ff_avg_vc1_mspel_mc00_mmxext out of dsputil_mmx.c 2013-02-26 16:29:48 +00:00
Use full path for #includes from another directory. Originally committed as revision 13098 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-05-09 11:56:36 +00:00			`#include "libavcodec/dsputil.h"`
x86: rename libavutil/x86_cpu.h to libavutil/x86/asm.h This puts x86-specific things in the x86/ subdirectory where they belong. Signed-off-by: Mans Rullgard <mans@mansr.com> 2012-08-08 12:51:52 +00:00			`#include "libavutil/x86/asm.h"`
x86: dsputil: Move constant declarations into separate header 2013-05-07 22:50:17 +00:00			`#include "constants.h"`
make ff_p* vars extern so that they can be used in various *_mmx.c files Originally committed as revision 11100 to svn://svn.ffmpeg.org/ffmpeg/trunk 2007-11-27 22:23:34 +00:00
split encoding part of dsputil_mmx into its own file Originally committed as revision 12223 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-02-25 23:14:22 +00:00			`#define MOVQ_WONE(regd) \`
Convert asm keyword into __asm__. Neither the asm() nor the __asm__() keyword is part of the C99 standard, but while GCC accepts the former in C89 syntax, it is not accepted in C99 unless GNU extensions are turned on (with -fasm). The latter form is accepted in any syntax as an extension (without requiring further command-line options). Sun Studio C99 compiler also does not accept asm() while accepting __asm__(), albeit reporting warnings that it's not valid C99 syntax. Originally committed as revision 15627 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-10-16 13:34:09 +00:00			`__asm__ volatile ( \`
split encoding part of dsputil_mmx into its own file Originally committed as revision 12223 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-02-25 23:14:22 +00:00			`"pcmpeqd %%" #regd ", %%" #regd " \n\t" \`
			`"psrlw $15, %%" #regd ::)`

x86: Factorize duplicated inline assembly snippets Signed-off-by: Diego Biurrun <diego@biurrun.de> 2013-04-22 09:23:47 +00:00			`#define JUMPALIGN() __asm__ volatile (".p2align 3"::)`
			`#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)`

			`#define MOVQ_BFE(regd) \`
			`__asm__ volatile ( \`
			`"pcmpeqd %%"#regd", %%"#regd" \n\t" \`
			`"paddb %%"#regd", %%"#regd" \n\t" ::)`

			`#ifndef PIC`
			`#define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_wtwo))`
			`#else`
			`// for shared library it's better to use this way for accessing constants`
			`// pcmpeqd -> -1`
			`#define MOVQ_WTWO(regd) \`
			`__asm__ volatile ( \`
			`"pcmpeqd %%"#regd", %%"#regd" \n\t" \`
			`"psrlw $15, %%"#regd" \n\t" \`
			`"psllw $1, %%"#regd" \n\t"::)`

			`#endif`

			`// using regr as temporary and for the output result`
			`// first argument is unmodifed and second is trashed`
			`// regfe is supposed to contain 0xfefefefefefefefe`
			`#define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \`
			`"movq "#rega", "#regr" \n\t" \`
			`"pand "#regb", "#regr" \n\t" \`
			`"pxor "#rega", "#regb" \n\t" \`
			`"pand "#regfe", "#regb" \n\t" \`
			`"psrlq $1, "#regb" \n\t" \`
			`"paddb "#regb", "#regr" \n\t"`

			`#define PAVGB_MMX(rega, regb, regr, regfe) \`
			`"movq "#rega", "#regr" \n\t" \`
			`"por "#regb", "#regr" \n\t" \`
			`"pxor "#rega", "#regb" \n\t" \`
			`"pand "#regfe", "#regb" \n\t" \`
			`"psrlq $1, "#regb" \n\t" \`
			`"psubb "#regb", "#regr" \n\t"`

			`// mm6 is supposed to contain 0xfefefefefefefefe`
			`#define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \`
			`"movq "#rega", "#regr" \n\t" \`
			`"movq "#regc", "#regp" \n\t" \`
			`"pand "#regb", "#regr" \n\t" \`
			`"pand "#regd", "#regp" \n\t" \`
			`"pxor "#rega", "#regb" \n\t" \`
			`"pxor "#regc", "#regd" \n\t" \`
			`"pand %%mm6, "#regb" \n\t" \`
			`"pand %%mm6, "#regd" \n\t" \`
			`"psrlq $1, "#regb" \n\t" \`
			`"psrlq $1, "#regd" \n\t" \`
			`"paddb "#regb", "#regr" \n\t" \`
			`"paddb "#regd", "#regp" \n\t"`

			`#define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \`
			`"movq "#rega", "#regr" \n\t" \`
			`"movq "#regc", "#regp" \n\t" \`
			`"por "#regb", "#regr" \n\t" \`
			`"por "#regd", "#regp" \n\t" \`
			`"pxor "#rega", "#regb" \n\t" \`
			`"pxor "#regc", "#regd" \n\t" \`
			`"pand %%mm6, "#regb" \n\t" \`
			`"pand %%mm6, "#regd" \n\t" \`
			`"psrlq $1, "#regd" \n\t" \`
			`"psrlq $1, "#regb" \n\t" \`
			`"psubb "#regb", "#regr" \n\t" \`
			`"psubb "#regd", "#regp" \n\t"`

dsputil: Add ff_ prefix to the dsputil_init functions Signed-off-by: Martin Storsjö <martin@martin.st> 2012-02-15 10:06:44 +00:00			`void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);`
			`void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);`
Move declarations of some mmx functions to dsputil_mmx.h Originally committed as revision 19739 to svn://svn.ffmpeg.org/ffmpeg/trunk 2009-08-29 16:55:50 +00:00
Drop DCTELEM typedef It does not help as an abstraction and adds dsputil dependencies. Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com> 2013-01-20 00:02:29 +00:00			`void ff_add_pixels_clamped_mmx(const int16_t block, uint8_t pixels, int line_size);`
			`void ff_put_pixels_clamped_mmx(const int16_t block, uint8_t pixels, int line_size);`
			`void ff_put_signed_pixels_clamped_mmx(const int16_t block, uint8_t pixels, int line_size);`
split encoding part of dsputil_mmx into its own file Originally committed as revision 12223 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-02-25 23:14:22 +00:00
x86: Move duplicated put_pixels{8\|16}_mmx functions into their own file 2013-04-23 15:10:59 +00:00
x86: dsputil: Move avg_pixels8_mmx() out of rnd_template.c The function is only instantiated once, so there is no point in keeping it in a template file. 2013-04-27 20:01:07 +00:00			`void ff_avg_pixels8_mmx(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`
x86: dsputil: Move avg_pixels16_mmx() out of rnd_template.c The function does not do any rounding, so there is no point in keeping it in a round template file. 2013-04-27 20:52:26 +00:00			`void ff_avg_pixels16_mmx(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`
x86: Move duplicated put_pixels{8\|16}_mmx functions into their own file 2013-04-23 15:10:59 +00:00			`void ff_put_pixels8_mmx(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`
			`void ff_put_pixels16_mmx(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`
x86: vc1dsp: Move ff_avg_vc1_mspel_mc00_mmxext out of dsputil_mmx.c 2013-02-26 16:29:48 +00:00			`void ff_avg_pixels8_mmxext(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`
x86: dsputil: Refactor some ff_{avg\|put}_pixels function declarations 2013-04-11 00:31:09 +00:00			`void ff_put_pixels8_mmxext(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`
			`void ff_avg_pixels16_sse2(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`
			`void ff_put_pixels16_sse2(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`
x86: hpeldsp: Move avg_pixels8_x2_mmx() out of hpeldsp_rnd_template.c The function is only instantiated once, so there is no point in keeping it in a template file. 2013-04-23 16:36:25 +00:00
			`void ff_avg_pixels8_x2_mmx(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`
x86: dsputil hpeldsp: Move shared template functions into separate object 2013-05-06 22:42:22 +00:00
			`void ff_avg_pixels8_xy2_mmx(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`
			`void ff_avg_pixels16_xy2_mmx(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`

			`void ff_put_pixels8_xy2_mmx(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`
			`void ff_put_pixels16_xy2_mmx(uint8_t block, const uint8_t pixels,`
			`ptrdiff_t line_size, int h);`
x86: vc1dsp: Move ff_avg_vc1_mspel_mc00_mmxext out of dsputil_mmx.c 2013-02-26 16:29:48 +00:00
Convert deinterlacing MMX code to YASM Originally committed as revision 24615 to svn://svn.ffmpeg.org/ffmpeg/trunk 2010-07-31 14:50:51 +00:00			`void ff_deinterlace_line_mmx(uint8_t *dst,`
			`const uint8_t lum_m4, const uint8_t lum_m3,`
			`const uint8_t lum_m2, const uint8_t lum_m1,`
			`const uint8_t *lum,`
			`int size);`

			`void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,`
			`const uint8_t *lum_m3,`
			`const uint8_t *lum_m2,`
			`const uint8_t *lum_m1,`
			`const uint8_t *lum, int size);`

Rename libavcodec/i386/ --> libavcodec/x86/. It contains optimizations that are not specific to i386 and libavutil uses this naming scheme already. Originally committed as revision 16270 to svn://svn.ffmpeg.org/ffmpeg/trunk 2008-12-22 09:12:42 +00:00			`#endif /* AVCODEC_X86_DSPUTIL_MMX_H */`