swscale/x86/rgb2rgb: disable rgb24toyv12_mmxext for x86_64

The mmxext implementation is slower than the C version in x86_64.

                                m32               m64
rgb24toyv12_16_200_c:       24942.7           14812.6
rgb24toyv12_16_200_mmxext:  17857.2 ( 1.40x)  17400.4 ( 0.85x)
rgb24toyv12_128_60_c:       56892.9           35616.9
rgb24toyv12_128_60_mmxext:  40730.9 ( 1.40x)  39610.4 ( 0.90x)
rgb24toyv12_512_16_c:       58402.7           37209.4
rgb24toyv12_512_16_mmxext:  44842.4 ( 1.30x)  41136.2 ( 0.90x)
rgb24toyv12_1920_4_c:       54827.4           34737.4
rgb24toyv12_1920_4_mmxext:  51169.9 ( 1.07x)  34818.9 ( 1.00x)
This commit is contained in:
Ramiro Polla 2024-08-28 20:03:39 +02:00
parent e0cc06184c
commit caaec2ea95

View File

@ -1480,7 +1480,7 @@ static inline void planar2x_mmxext(const uint8_t *src, uint8_t *dst, int srcWidt
* others are ignored in the C version.
* FIXME: Write HQ version.
*/
#if HAVE_7REGS
#if ARCH_X86_32 && HAVE_7REGS
static inline void rgb24toyv12_mmxext(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
int width, int height,
int lumStride, int chromStride, int srcStride,
@ -2257,9 +2257,9 @@ static av_cold void rgb2rgb_init_mmxext(void)
yuyvtoyuv422 = yuyvtoyuv422_mmxext;
planar2x = planar2x_mmxext;
#if HAVE_7REGS
#if ARCH_X86_32 && HAVE_7REGS
ff_rgb24toyv12 = rgb24toyv12_mmxext;
#endif /* HAVE_7REGS */
#endif /* ARCH_X86_32 && HAVE_7REGS */
yuyvtoyuv420 = yuyvtoyuv420_mmxext;
uyvytoyuv420 = uyvytoyuv420_mmxext;