aligned bitstream writer (1% slower on p3 but perhaps its faster on p4?)

Originally committed as revision 296 to svn://svn.ffmpeg.org/ffmpeg/trunk
2024-09-20 13:26:39 +00:00 · 2002-02-12 22:43:26 +00:00 · 2002-02-12 22:43:26 +00:00 · 4f0f8bfce3
commit 4f0f8bfce3
parent 17592475b3
1 changed files with 35 additions and 1 deletions
--- a/libavcodec/common.h
+++ b/libavcodec/common.h
@ -9,6 +9,7 @@
 #endif

 //#define ALT_BITSTREAM_WRITER
+//#define ALIGNED_BITSTREAM_WRITER
 //#define ALT_BITSTREAM_READER
 //#define ALIGNED_BITSTREAM
 #define FAST_GET_FIRST_VLC
@ -238,8 +239,40 @@ static inline uint32_t unaligned32(const void *v) {
 #endif //!ARCH_X86

 #ifdef ALT_BITSTREAM_WRITER
-static inline void put_bits(PutBitContext *s, int n, int value)
+static inline void put_bits(PutBitContext *s, int n, unsigned int value)
 {
+#ifdef ALIGNED_BITSTREAM_WRITER
+#ifdef ARCH_X86
+    asm volatile(
+	"movl %0, %%ecx			\n\t"
+	"xorl %%eax, %%eax		\n\t"
+	"shrdl %%cl, %1, %%eax		\n\t"
+	"shrl %%cl, %1			\n\t"
+	"movl %0, %%ecx			\n\t"
+	"shrl $3, %%ecx			\n\t"
+	"andl $0xFFFFFFFC, %%ecx	\n\t"
+	"bswapl %1			\n\t"
+	"orl %1, (%2, %%ecx)		\n\t"
+	"bswapl %%eax			\n\t"
+	"addl %3, %0			\n\t"
+	"movl %%eax, 4(%2, %%ecx)	\n\t"
+	: "=&r" (s->index), "=&r" (value)
+	: "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n))
+	: "%eax", "%ecx"
+    );
+#else
+    int index= s->index;
+    uint32_t *ptr= ((uint32_t *)s->buf)+(index>>5);
+    
+    value<<= 32-n; 
+    
+    ptr[0] |= be2me_32(value>>(index&31));
+    ptr[1]  = be2me_32(value<<(32-(index&31)));
+//if(n>24) printf("%d %d\n", n, value);
+    index+= n;
+    s->index= index;
+#endif
+#else //ALIGNED_BITSTREAM_WRITER
 #ifdef ARCH_X86
    asm volatile(
 	"movl $7, %%ecx			\n\t"
@ -267,6 +300,7 @@ static inline void put_bits(PutBitContext *s, int n, int value)
    index+= n;
    s->index= index;
 #endif
+#endif //!ALIGNED_BITSTREAM_WRITER
 }
 #endif