From c62c07d3d7d204527bfa991bac1204cbb593747b Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Mon, 23 Feb 2004 20:56:56 +0000 Subject: [PATCH] multithreaded mpeg2 decoding Originally committed as revision 2810 to svn://svn.ffmpeg.org/ffmpeg/trunk --- ffmpeg.c | 5 + ffplay.c | 19 +++- libavcodec/error_resilience.c | 2 +- libavcodec/mpeg12.c | 191 +++++++++++++++++++++++----------- libavcodec/mpegvideo.c | 8 +- libavcodec/mpegvideo.h | 1 + 6 files changed, 158 insertions(+), 68 deletions(-) diff --git a/ffmpeg.c b/ffmpeg.c index 8d2264bffa..138d531215 100644 --- a/ffmpeg.c +++ b/ffmpeg.c @@ -2256,6 +2256,11 @@ static void opt_input_file(const char *filename) /* update the current parameters so that they match the one of the input stream */ for(i=0;inb_streams;i++) { AVCodecContext *enc = &ic->streams[i]->codec; +#if defined(HAVE_PTHREADS) || defined(HAVE_W32THREADS) + if(thread_count>1) + avcodec_thread_init(enc, thread_count); +#endif + enc->thread_count= thread_count; switch(enc->codec_type) { case CODEC_TYPE_AUDIO: //fprintf(stderr, "\nInput Audio channels: %d", enc->channels); diff --git a/ffplay.c b/ffplay.c index 0e76aeaef2..f39e09243f 100644 --- a/ffplay.c +++ b/ffplay.c @@ -168,6 +168,7 @@ static int64_t start_time = AV_NOPTS_VALUE; static int debug = 0; static int debug_mv = 0; static int step = 0; +static int thread_count = 1; /* current context */ static int is_full_screen; @@ -1169,6 +1170,12 @@ static int stream_component_open(VideoState *is, int stream_index) if (!codec || avcodec_open(enc, codec) < 0) return -1; + enc->debug = debug; +#if defined(HAVE_PTHREADS) || defined(HAVE_W32THREADS) + if(thread_count>1) + avcodec_thread_init(enc, thread_count); +#endif + enc->thread_count= thread_count; switch(enc->codec_type) { case CODEC_TYPE_AUDIO: is->audio_stream = stream_index; @@ -1197,7 +1204,6 @@ static int stream_component_open(VideoState *is, int stream_index) packet_queue_init(&is->videoq); is->video_tid = SDL_CreateThread(video_thread, is); - enc->debug = debug; enc->debug_mv = debug_mv; break; default: @@ -1793,6 +1799,14 @@ static void opt_vismv(const char *arg) { debug_mv = atoi(arg); } + +static void opt_thread_count(const char *arg) +{ + thread_count= atoi(arg); +#if !defined(HAVE_PTHREADS) && !defined(HAVE_W32THREADS) + fprintf(stderr, "Warning: not compiled with thread support, using thread emulation\n"); +#endif +} const OptionDef options[] = { { "h", 0, {(void*)show_help}, "show help" }, @@ -1814,7 +1828,8 @@ const OptionDef options[] = { #ifdef CONFIG_NETWORK { "rtp_tcp", OPT_EXPERT, {(void*)&opt_rtp_tcp}, "force RTP/TCP protocol usage", "" }, #endif - { "sync", HAS_ARG | OPT_EXPERT, {(void*)&opt_sync}, "set audio-video sync. type (type=audio/video/ext)", "type" }, + { "sync", HAS_ARG | OPT_EXPERT, {(void*)opt_sync}, "set audio-video sync. type (type=audio/video/ext)", "type" }, + { "threads", HAS_ARG | OPT_EXPERT, {(void*)opt_thread_count}, "thread count", "count" }, { NULL, }, }; diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c index 5067a248f5..c6b10a79c7 100644 --- a/libavcodec/error_resilience.c +++ b/libavcodec/error_resilience.c @@ -652,7 +652,7 @@ void ff_er_add_slice(MpegEncContext *s, int startx, int starty, int endx, int en s->error_status_table[start_xy] |= VP_START; - if(start_xy > 0){ + if(start_xy > 0 && s->avctx->thread_count <= 1){ int prev_status= s->error_status_table[ s->mb_index2xy[start_i - 1] ]; prev_status &= ~ VP_START; diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c index 748c0fb9bb..be2209ce13 100644 --- a/libavcodec/mpeg12.c +++ b/libavcodec/mpeg12.c @@ -1707,6 +1707,7 @@ typedef struct Mpeg1Context { int mpeg_enc_ctx_allocated; /* true if decoding context allocated */ int repeat_field; /* true if we must repeat the field */ AVPanScan pan_scan; /** some temporary storage for the panscan */ + int slice_count; } Mpeg1Context; static int mpeg_decode_init(AVCodecContext *avctx) @@ -2015,43 +2016,14 @@ short * tmp; s->pblocks[5] = tmp; } -#define DECODE_SLICE_FATAL_ERROR -2 -#define DECODE_SLICE_ERROR -1 -#define DECODE_SLICE_OK 0 +static int mpeg_field_start(MpegEncContext *s){ + AVCodecContext *avctx= s->avctx; + Mpeg1Context *s1 = (Mpeg1Context*)s; -/** - * decodes a slice. - * @return DECODE_SLICE_FATAL_ERROR if a non recoverable error occured
- * DECODE_SLICE_ERROR if the slice is damaged
- * DECODE_SLICE_OK if this slice is ok
- */ -static int mpeg_decode_slice(AVCodecContext *avctx, - AVFrame *pict, - int start_code, - uint8_t **buf, int buf_size) -{ - Mpeg1Context *s1 = avctx->priv_data; - MpegEncContext *s = &s1->mpeg_enc_ctx; - int ret; - const int field_pic= s->picture_structure != PICT_FRAME; - - s->resync_mb_x= s->mb_x = - s->resync_mb_y= s->mb_y = -1; - - start_code = (start_code - 1) & 0xff; - if (start_code >= s->mb_height){ - av_log(s->avctx, AV_LOG_ERROR, "slice below image (%d >= %d)\n", start_code, s->mb_height); - return -1; - } - - ff_mpeg1_clean_buffers(s); - s->interlaced_dct = 0; - /* start frame decoding */ - if (s->first_slice) { - if(s->first_field || s->picture_structure==PICT_FRAME){ + if(s->first_field || s->picture_structure==PICT_FRAME){ if(MPV_frame_start(s, avctx) < 0) - return DECODE_SLICE_FATAL_ERROR; + return -1; ff_er_frame_start(s); @@ -2069,7 +2041,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx, } *s->current_picture_ptr->pan_scan= s1->pan_scan; - }else{ //second field + }else{ //second field int i; if(!s->current_picture_ptr){ @@ -2083,30 +2055,48 @@ static int mpeg_decode_slice(AVCodecContext *avctx, s->current_picture.data[i] += s->current_picture_ptr->linesize[i]; } } - } + } #ifdef HAVE_XVMC // MPV_frame_start will call this function too, // but we need to call it on every field - if(s->avctx->xvmc_acceleration) + if(s->avctx->xvmc_acceleration) XVMC_field_start(s,avctx); #endif - }//fi(s->first_slice) + return 0; +} + +#define DECODE_SLICE_ERROR -1 +#define DECODE_SLICE_OK 0 + +/** + * decodes a slice. MpegEncContext.mb_y must be set to the MB row from the startcode + * @return DECODE_SLICE_ERROR if the slice is damaged
+ * DECODE_SLICE_OK if this slice is ok
+ */ +static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y, + uint8_t **buf, int buf_size) +{ + MpegEncContext *s = &s1->mpeg_enc_ctx; + AVCodecContext *avctx= s->avctx; + int ret; + const int field_pic= s->picture_structure != PICT_FRAME; + + s->resync_mb_x= + s->resync_mb_y= -1; + + if (mb_y >= s->mb_height){ + av_log(s->avctx, AV_LOG_ERROR, "slice below image (%d >= %d)\n", s->mb_y, s->mb_height); + return -1; + } + init_get_bits(&s->gb, *buf, buf_size*8); - s->qscale = get_qscale(s); - if (s->first_slice && (s->first_field || s->picture_structure==PICT_FRAME)) { - if(s->avctx->debug&FF_DEBUG_PICT_INFO){ - av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%2d%2d%2d%2d %s %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n", - s->qscale, s->mpeg_f_code[0][0],s->mpeg_f_code[0][1],s->mpeg_f_code[1][0],s->mpeg_f_code[1][1], - s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), - s->progressive_sequence ? "ps" :"", s->progressive_frame ? "pf" : "", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"", - s->intra_dc_precision, s->picture_structure, s->frame_pred_frame_dct, s->concealment_motion_vectors, - s->q_scale_type, s->intra_vlc_format, s->repeat_first_field, s->chroma_420_type ? "420" :""); - } - } + ff_mpeg1_clean_buffers(s); + s->interlaced_dct = 0; + + s->qscale = get_qscale(s); - s->first_slice = 0; if(s->qscale == 0){ av_log(s->avctx, AV_LOG_ERROR, "qscale == 0\n"); return -1; @@ -2135,12 +2125,23 @@ static int mpeg_decode_slice(AVCodecContext *avctx, break; } } - + s->resync_mb_x= s->mb_x; - s->resync_mb_y= s->mb_y = start_code; + s->resync_mb_y= s->mb_y= mb_y; s->mb_skip_run= 0; ff_init_block_index(s); + if (s->mb_y==0 && s->mb_x==0 && (s->first_field || s->picture_structure==PICT_FRAME)) { + if(s->avctx->debug&FF_DEBUG_PICT_INFO){ + av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%2d%2d%2d%2d %s %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n", + s->qscale, s->mpeg_f_code[0][0],s->mpeg_f_code[0][1],s->mpeg_f_code[1][0],s->mpeg_f_code[1][1], + s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), + s->progressive_sequence ? "ps" :"", s->progressive_frame ? "pf" : "", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"", + s->intra_dc_precision, s->picture_structure, s->frame_pred_frame_dct, s->concealment_motion_vectors, + s->q_scale_type, s->intra_vlc_format, s->repeat_first_field, s->chroma_420_type ? "420" :""); + } + } + for(;;) { #ifdef HAVE_XVMC //one 1 we memcpy blocks in xvmcvideo @@ -2268,6 +2269,39 @@ eos: // end of slice return 0; } +static int slice_decode_thread(AVCodecContext *c, void *arg){ + MpegEncContext *s= arg; + uint8_t *buf= s->gb.buffer; + int mb_y= s->start_mb_y; + + s->error_count= 3*(s->end_mb_y - s->start_mb_y)*s->mb_width; + + for(;;){ + int start_code, ret; + + ret= mpeg_decode_slice((Mpeg1Context*)s, mb_y, &buf, s->gb.buffer_end - buf); + emms_c(); +//av_log(c, AV_LOG_DEBUG, "ret:%d resync:%d/%d mb:%d/%d ts:%d/%d ec:%d\n", +//ret, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, s->start_mb_y, s->end_mb_y, s->error_count); + if(ret < 0){ + if(s->resync_mb_x>=0 && s->resync_mb_y>=0) + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, AC_ERROR|DC_ERROR|MV_ERROR); + }else{ + ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, AC_END|DC_END|MV_END); + } + + if(s->mb_y == s->end_mb_y) + return 0; + + start_code = find_start_code(&buf, s->gb.buffer_end); + mb_y= start_code - SLICE_MIN_START_CODE; + if(mb_y < 0 || mb_y >= s->end_mb_y) + return -1; + } + + return 0; //not reached +} + /** * handles slice ends. * @return 1 if it seems to be the last slice of @@ -2621,12 +2655,21 @@ static int mpeg_decode_frame(AVCodecContext *avctx, if(s->mpeg_enc_ctx_allocated==0 && avctx->codec_tag == ff_get_fourcc("VCR2")) vcr2_init_sequence(avctx); - + + s->slice_count= 0; + for(;;) { /* find start next code */ start_code = find_start_code(&buf_ptr, buf_end); if (start_code < 0){ if(s2->pict_type != B_TYPE || avctx->hurry_up==0){ + if(avctx->thread_count > 1){ + int i; + + avctx->execute(avctx, slice_decode_thread, (void**)&(s2->thread_context[0]), NULL, s->slice_count); + for(i=0; islice_count; i++) + s2->error_count += s2->thread_context[i]->error_count; + } if (slice_end(avctx, picture)) { if(s2->last_picture_ptr || s2->low_delay) //FIXME merge with the stuff in mpeg_decode_slice *data_size = sizeof(AVPicture); @@ -2667,6 +2710,7 @@ static int mpeg_decode_frame(AVCodecContext *avctx, default: if (start_code >= SLICE_MIN_START_CODE && start_code <= SLICE_MAX_START_CODE) { + int mb_y= start_code - SLICE_MIN_START_CODE; /* skip b frames if we dont have reference frames */ if(s2->last_picture_ptr==NULL && s2->pict_type==B_TYPE) break; @@ -2676,17 +2720,38 @@ static int mpeg_decode_frame(AVCodecContext *avctx, if(avctx->hurry_up>=5) break; if (!s->mpeg_enc_ctx_allocated) break; - - ret = mpeg_decode_slice(avctx, picture, - start_code, &buf_ptr, input_size); - emms_c(); - - if(ret < 0){ - if(s2->resync_mb_x>=0 && s2->resync_mb_y>=0) - ff_er_add_slice(s2, s2->resync_mb_x, s2->resync_mb_y, s2->mb_x, s2->mb_y, AC_ERROR|DC_ERROR|MV_ERROR); - if(ret==DECODE_SLICE_FATAL_ERROR) return -1; + + if(s2->first_slice){ + s2->first_slice=0; + if(mpeg_field_start(s2) < 0) + return -1; + } + + if(avctx->thread_count > 1){ + int threshold= (s2->mb_height*s->slice_count + avctx->thread_count/2) / avctx->thread_count; + if(threshold <= mb_y){ + MpegEncContext *thread_context= s2->thread_context[s->slice_count]; + + thread_context->start_mb_y= mb_y; + thread_context->end_mb_y = s2->mb_height; + if(s->slice_count){ + s2->thread_context[s->slice_count-1]->end_mb_y= mb_y; + ff_update_duplicate_context(thread_context, s2); + } + init_get_bits(&thread_context->gb, buf_ptr, input_size*8); + s->slice_count++; + } + buf_ptr += 2; //FIXME add minimum num of bytes per slice }else{ - ff_er_add_slice(s2, s2->resync_mb_x, s2->resync_mb_y, s2->mb_x-1, s2->mb_y, AC_END|DC_END|MV_END); + ret = mpeg_decode_slice(s, mb_y, &buf_ptr, input_size); + emms_c(); + + if(ret < 0){ + if(s2->resync_mb_x>=0 && s2->resync_mb_y>=0) + ff_er_add_slice(s2, s2->resync_mb_x, s2->resync_mb_y, s2->mb_x, s2->mb_y, AC_ERROR|DC_ERROR|MV_ERROR); + }else{ + ff_er_add_slice(s2, s2->resync_mb_x, s2->resync_mb_y, s2->mb_x-1, s2->mb_y, AC_END|DC_END|MV_END); + } } } break; diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index e92022b0cc..3b36f97244 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -468,13 +468,17 @@ static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){ #undef COPY } -static void update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){ +void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){ MpegEncContext bak; + int i; //FIXME copy only needed parts //START_TIMER backup_duplicate_context(&bak, dst); memcpy(dst, src, sizeof(MpegEncContext)); backup_duplicate_context(dst, &bak); + for(i=0;i<12;i++){ + dst->pblocks[i] = (short *)(&dst->block[i]); + } //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads } @@ -4632,7 +4636,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->mb_intra=0; //for the rate distoration & bit compare functions for(i=1; iavctx->thread_count; i++){ - update_duplicate_context(s->thread_context[i], s); + ff_update_duplicate_context(s->thread_context[i], s); } /* Estimate motion for every MB */ diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index 6eb3a0fcff..e6e242323e 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -736,6 +736,7 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict); void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix); int ff_find_unused_picture(MpegEncContext *s, int shared); void ff_denoise_dct(MpegEncContext *s, DCTELEM *block); +void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src); void ff_er_frame_start(MpegEncContext *s); void ff_er_frame_end(MpegEncContext *s);