reorder a few more paddws to reduce dependancy chains

chroma mc4 put 2480 -> 2460 dezicyles on duron

Originally committed as revision 8098 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
Michael Niedermayer 2007-02-23 15:44:56 +00:00
parent b4fe97696c
commit 58e31fb1d5

View File

@ -218,8 +218,8 @@ static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*
"movq %%mm1, %%mm0 \n\t"
"pmullw %%mm5, %%mm6 \n\t"
"pmullw %%mm3, %%mm1 \n\t"
"paddw %4, %%mm6 \n\t"
"paddw %%mm6, %%mm1 \n\t"
"paddw %4, %%mm1 \n\t"
"psrlw $6, %%mm1 \n\t"
"packuswb %%mm1, %%mm1 \n\t"
H264_CHROMA_OP4((%0), %%mm1, %%mm6)
@ -236,8 +236,8 @@ static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*
"movq %%mm1, %%mm6 \n\t"
"pmullw %%mm5, %%mm0 \n\t"
"pmullw %%mm3, %%mm1 \n\t"
"paddw %4, %%mm0 \n\t"
"paddw %%mm0, %%mm1 \n\t"
"paddw %4, %%mm1 \n\t"
"psrlw $6, %%mm1 \n\t"
"packuswb %%mm1, %%mm1 \n\t"
H264_CHROMA_OP4((%0), %%mm1, %%mm0)