swscale/x86/rgb2rgb: disable rgb24toyv12_mmxext for x86_64

The mmxext implementation is slower than the C version in x86_64. m32 m64 rgb24toyv12_16_200_c: 24942.7 14812.6 rgb24toyv12_16_200_mmxext: 17857.2 ( 1.40x) 17400.4 ( 0.85x) rgb24toyv12_128_60_c: 56892.9 35616.9 rgb24toyv12_128_60_mmxext: 40730.9 ( 1.40x) 39610.4 ( 0.90x) rgb24toyv12_512_16_c: 58402.7 37209.4 rgb24toyv12_512_16_mmxext: 44842.4 ( 1.30x) 41136.2 ( 0.90x) rgb24toyv12_1920_4_c: 54827.4 34737.4 rgb24toyv12_1920_4_mmxext: 51169.9 ( 1.07x) 34818.9 ( 1.00x)
2024-09-16 19:43:37 +00:00 · 2024-08-28 20:03:39 +02:00 · 2024-08-28 20:03:39 +02:00 · caaec2ea95
commit caaec2ea95
parent e0cc06184c
1 changed files with 3 additions and 3 deletions
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@ -1480,7 +1480,7 @@ static inline void planar2x_mmxext(const uint8_t *src, uint8_t *dst, int srcWidt
 * others are ignored in the C version.
 * FIXME: Write HQ version.
 */
-#if HAVE_7REGS
+#if ARCH_X86_32 && HAVE_7REGS
 static inline void rgb24toyv12_mmxext(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
                                       int width, int height,
                                       int lumStride, int chromStride, int srcStride,
@ -2257,9 +2257,9 @@ static av_cold void rgb2rgb_init_mmxext(void)
    yuyvtoyuv422       = yuyvtoyuv422_mmxext;

    planar2x           = planar2x_mmxext;
-#if HAVE_7REGS
+#if ARCH_X86_32 && HAVE_7REGS
    ff_rgb24toyv12     = rgb24toyv12_mmxext;
-#endif /* HAVE_7REGS */
+#endif /* ARCH_X86_32 && HAVE_7REGS */

    yuyvtoyuv420       = yuyvtoyuv420_mmxext;
    uyvytoyuv420       = uyvytoyuv420_mmxext;