From 4704097a2b557fa9e58def0956dbdef7842a3fd3 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Tue, 13 May 2003 00:46:42 +0000 Subject: [PATCH] optimizations Originally committed as revision 1867 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/golomb.c | 57 ++++++++++++++++++++++++++++++++++++++++ libavcodec/golomb.h | 64 ++++++++++++++++++++++++++++++--------------- libavcodec/h264.c | 49 ++++++++++++++++++++++------------ libavcodec/svq3.c | 7 +++++ 4 files changed, 139 insertions(+), 38 deletions(-) diff --git a/libavcodec/golomb.c b/libavcodec/golomb.c index c8c200c30a..a696b2a766 100644 --- a/libavcodec/golomb.c +++ b/libavcodec/golomb.c @@ -95,3 +95,60 @@ const uint8_t ff_ue_golomb_len[256]={ 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,17, }; + +const uint8_t ff_interleaved_golomb_vlc_len[256]={ +9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5, +9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5, +9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +}; + +const uint8_t ff_interleaved_ue_golomb_vlc_code[256]={ + 15,16,7, 7, 17,18,8, 8, 3, 3, 3, 3, 3, 3, 3, 3, + 19,20,9, 9, 21,22,10,10,4, 4, 4, 4, 4, 4, 4, 4, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 23,24,11,11,25,26,12,12,5, 5, 5, 5, 5, 5, 5, 5, + 27,28,13,13,29,30,14,14,6, 6, 6, 6, 6, 6, 6, 6, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; + +const int8_t ff_interleaved_se_golomb_vlc_code[256]={ + 8, -8, 4, 4, 9, -9, -4, -4, 2, 2, 2, 2, 2, 2, 2, 2, + 10,-10, 5, 5, 11,-11, -5, -5, -2, -2, -2, -2, -2, -2, -2, -2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 12,-12, 6, 6, 13,-13, -6, -6, 3, 3, 3, 3, 3, 3, 3, 3, + 14,-14, 7, 7, 15,-15, -7, -7, -3, -3, -3, -3, -3, -3, -3, -3, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; diff --git a/libavcodec/golomb.h b/libavcodec/golomb.h index 510d913ae1..f1e6d6d987 100644 --- a/libavcodec/golomb.h +++ b/libavcodec/golomb.h @@ -32,6 +32,10 @@ extern const uint8_t ff_ue_golomb_vlc_code[512]; extern const int8_t ff_se_golomb_vlc_code[512]; extern const uint8_t ff_ue_golomb_len[256]; +extern const uint8_t ff_interleaved_golomb_vlc_len[256]; +extern const uint8_t ff_interleaved_ue_golomb_vlc_code[256]; +extern const int8_t ff_interleaved_se_golomb_vlc_code[256]; + /** * read unsigned exp golomb code. @@ -62,24 +66,33 @@ static inline int get_ue_golomb(GetBitContext *gb){ } static inline int svq3_get_ue_golomb(GetBitContext *gb){ - unsigned int buf; + uint32_t buf; int log; OPEN_READER(re, gb); UPDATE_CACHE(re, gb); - buf=GET_CACHE(re, gb)|1; + buf=GET_CACHE(re, gb); + + if(buf&0xAA800000){ + buf >>= 32 - 8; + LAST_SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]); + CLOSE_READER(re, gb); + + return ff_interleaved_ue_golomb_vlc_code[buf]; + }else{ + buf|=1; + if((buf & 0xAAAAAAAA) == 0) + return INVALID_VLC; - if((buf & 0xAAAAAAAA) == 0) - return INVALID_VLC; + for(log=31; (buf & 0x80000000) == 0; log--){ + buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30); + } - for(log=31; (buf & 0x80000000) == 0; log--){ - buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30); + LAST_SKIP_BITS(re, gb, 63 - 2*log); + CLOSE_READER(re, gb); + + return ((buf << log) >> log) - 1; } - - LAST_SKIP_BITS(re, gb, 63 - 2*log); - CLOSE_READER(re, gb); - - return ((buf << log) >> log) - 1; } /** @@ -141,19 +154,28 @@ static inline int svq3_get_se_golomb(GetBitContext *gb){ OPEN_READER(re, gb); UPDATE_CACHE(re, gb); - buf=GET_CACHE(re, gb)|1; + buf=GET_CACHE(re, gb); - if((buf & 0xAAAAAAAA) == 0) - return INVALID_VLC; + if(buf&0xAA800000){ + buf >>= 32 - 8; + LAST_SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]); + CLOSE_READER(re, gb); + + return ff_interleaved_se_golomb_vlc_code[buf]; + }else{ + buf |=1; + if((buf & 0xAAAAAAAA) == 0) + return INVALID_VLC; - for(log=31; (buf & 0x80000000) == 0; log--){ - buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30); + for(log=31; (buf & 0x80000000) == 0; log--){ + buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30); + } + + LAST_SKIP_BITS(re, gb, 63 - 2*log); + CLOSE_READER(re, gb); + + return (signed) (((((buf << log) >> log) - 1) ^ -(buf & 0x1)) + 1) >> 1; } - - LAST_SKIP_BITS(re, gb, 63 - 2*log); - CLOSE_READER(re, gb); - - return (signed) (((((buf << log) >> log) - 1) ^ -(buf & 0x1)) + 1) >> 1; } #ifdef TRACE diff --git a/libavcodec/h264.c b/libavcodec/h264.c index c52758ed92..63d1af1746 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -2278,13 +2278,19 @@ static void hl_decode_mb(H264Context *h){ if(!IS_INTRA4x4(mb_type)){ - for(i=0; i<16; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below - uint8_t * const ptr= dest_y + h->block_offset[i]; - if(s->codec_id == CODEC_ID_H264) + if(s->codec_id == CODEC_ID_H264){ + for(i=0; i<16; i++){ + if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below + uint8_t * const ptr= dest_y + h->block_offset[i]; h264_add_idct_c(ptr, h->mb + i*16, linesize); - else + } + } + }else{ + for(i=0; i<16; i++){ + if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below + uint8_t * const ptr= dest_y + h->block_offset[i]; svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0); + } } } } @@ -2292,22 +2298,31 @@ static void hl_decode_mb(H264Context *h){ if(!(s->flags&CODEC_FLAG_GRAY)){ chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp); chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp); - for(i=16; i<16+4; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ - uint8_t * const ptr= dest_cb + h->block_offset[i]; - if(s->codec_id == CODEC_ID_H264) + if(s->codec_id == CODEC_ID_H264){ + for(i=16; i<16+4; i++){ + if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ + uint8_t * const ptr= dest_cb + h->block_offset[i]; h264_add_idct_c(ptr, h->mb + i*16, uvlinesize); - else - svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2); + } } - } - for(i=20; i<20+4; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ - uint8_t * const ptr= dest_cr + h->block_offset[i]; - if(s->codec_id == CODEC_ID_H264) + for(i=20; i<20+4; i++){ + if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ + uint8_t * const ptr= dest_cr + h->block_offset[i]; h264_add_idct_c(ptr, h->mb + i*16, uvlinesize); - else + } + } + }else{ + for(i=16; i<16+4; i++){ + if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ + uint8_t * const ptr= dest_cb + h->block_offset[i]; svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2); + } + } + for(i=20; i<20+4; i++){ + if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ + uint8_t * const ptr= dest_cr + h->block_offset[i]; + svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2); + } } } } diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c index 0cd927956c..969e45d81f 100644 --- a/libavcodec/svq3.c +++ b/libavcodec/svq3.c @@ -729,6 +729,13 @@ static int svq3_decode_frame (AVCodecContext *avctx, while (get_bits (&s->gb, 1)) { get_bits (&s->gb, 8); } + + if(avctx->debug&FF_DEBUG_PICT_INFO){ + printf("%c hpel:%d, tpel:%d aqp:%d qp:%d\n", + ff_get_pict_type_char(s->pict_type), h->halfpel_flag, h->thirdpel_flag, + s->adaptive_quant, s->qscale + ); + } /* B-frames are not supported */ if (s->pict_type == B_TYPE/* && avctx->hurry_up*/)