From c62c07d3d7d204527bfa991bac1204cbb593747b Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Mon, 23 Feb 2004 20:56:56 +0000
Subject: [PATCH] multithreaded mpeg2 decoding

Originally committed as revision 2810 to svn://svn.ffmpeg.org/ffmpeg/trunk
---
 ffmpeg.c                      |   5 +
 ffplay.c                      |  19 +++-
 libavcodec/error_resilience.c |   2 +-
 libavcodec/mpeg12.c           | 191 +++++++++++++++++++++++-----------
 libavcodec/mpegvideo.c        |   8 +-
 libavcodec/mpegvideo.h        |   1 +
 6 files changed, 158 insertions(+), 68 deletions(-)
diff --git a/ffmpeg.c b/ffmpeg.c
index 8d2264bffa..138d531215 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -2256,6 +2256,11 @@ static void opt_input_file(const char *filename)
     /* update the current parameters so that they match the one of the input stream */
     for(i=0;i<ic->nb_streams;i++) {
         AVCodecContext *enc = &ic->streams[i]->codec;
+#if defined(HAVE_PTHREADS) || defined(HAVE_W32THREADS)
+        if(thread_count>1)
+            avcodec_thread_init(enc, thread_count);
+#endif
+        enc->thread_count= thread_count;
         switch(enc->codec_type) {
         case CODEC_TYPE_AUDIO:
             //fprintf(stderr, "\nInput Audio channels: %d", enc->channels);
diff --git a/ffplay.c b/ffplay.c
index 0e76aeaef2..f39e09243f 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -168,6 +168,7 @@ static int64_t start_time = AV_NOPTS_VALUE;
 static int debug = 0;
 static int debug_mv = 0;
 static int step = 0;
+static int thread_count = 1;
 
 /* current context */
 static int is_full_screen;
@@ -1169,6 +1170,12 @@ static int stream_component_open(VideoState *is, int stream_index)
     if (!codec ||
         avcodec_open(enc, codec) < 0)
         return -1;
+    enc->debug = debug;
+#if defined(HAVE_PTHREADS) || defined(HAVE_W32THREADS)
+    if(thread_count>1)
+        avcodec_thread_init(enc, thread_count);
+#endif
+    enc->thread_count= thread_count;
     switch(enc->codec_type) {
     case CODEC_TYPE_AUDIO:
         is->audio_stream = stream_index;
@@ -1197,7 +1204,6 @@ static int stream_component_open(VideoState *is, int stream_index)
 
         packet_queue_init(&is->videoq);
         is->video_tid = SDL_CreateThread(video_thread, is);
-        enc->debug = debug;
         enc->debug_mv = debug_mv;
         break;
     default:
@@ -1793,6 +1799,14 @@ static void opt_vismv(const char *arg)
 {
     debug_mv = atoi(arg);
 }
+
+static void opt_thread_count(const char *arg)
+{
+    thread_count= atoi(arg);
+#if !defined(HAVE_PTHREADS) && !defined(HAVE_W32THREADS)
+    fprintf(stderr, "Warning: not compiled with thread support, using thread emulation\n");
+#endif
+}
     
 const OptionDef options[] = {
     { "h", 0, {(void*)show_help}, "show help" },    
@@ -1814,7 +1828,8 @@ const OptionDef options[] = {
 #ifdef CONFIG_NETWORK
     { "rtp_tcp", OPT_EXPERT, {(void*)&opt_rtp_tcp}, "force RTP/TCP protocol usage", "" },
 #endif
-    { "sync", HAS_ARG | OPT_EXPERT, {(void*)&opt_sync}, "set audio-video sync. type (type=audio/video/ext)", "type" },
+    { "sync", HAS_ARG | OPT_EXPERT, {(void*)opt_sync}, "set audio-video sync. type (type=audio/video/ext)", "type" },
+    { "threads", HAS_ARG | OPT_EXPERT, {(void*)opt_thread_count}, "thread count", "count" },
     { NULL, },
 };
 
diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c
index 5067a248f5..c6b10a79c7 100644
--- a/libavcodec/error_resilience.c
+++ b/libavcodec/error_resilience.c
@@ -652,7 +652,7 @@ void ff_er_add_slice(MpegEncContext *s, int startx, int starty, int endx, int en
  
     s->error_status_table[start_xy] |= VP_START;
 
-    if(start_xy > 0){
+    if(start_xy > 0 && s->avctx->thread_count <= 1){
         int prev_status= s->error_status_table[ s->mb_index2xy[start_i - 1] ];
         
         prev_status &= ~ VP_START;
diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index 748c0fb9bb..be2209ce13 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -1707,6 +1707,7 @@ typedef struct Mpeg1Context {
     int mpeg_enc_ctx_allocated; /* true if decoding context allocated */
     int repeat_field; /* true if we must repeat the field */
     AVPanScan pan_scan; /** some temporary storage for the panscan */
+    int slice_count;
 } Mpeg1Context;
 
 static int mpeg_decode_init(AVCodecContext *avctx)
@@ -2015,43 +2016,14 @@ short * tmp;
     s->pblocks[5] = tmp;
 }
 
-#define DECODE_SLICE_FATAL_ERROR -2
-#define DECODE_SLICE_ERROR -1
-#define DECODE_SLICE_OK 0
+static int mpeg_field_start(MpegEncContext *s){
+    AVCodecContext *avctx= s->avctx;
+    Mpeg1Context *s1 = (Mpeg1Context*)s;
 
-/**
- * decodes a slice.
- * @return DECODE_SLICE_FATAL_ERROR if a non recoverable error occured<br>
- *         DECODE_SLICE_ERROR if the slice is damaged<br>
- *         DECODE_SLICE_OK if this slice is ok<br>
- */
-static int mpeg_decode_slice(AVCodecContext *avctx, 
-                              AVFrame *pict,
-                              int start_code,
-                              uint8_t **buf, int buf_size)
-{
-    Mpeg1Context *s1 = avctx->priv_data;
-    MpegEncContext *s = &s1->mpeg_enc_ctx;
-    int ret;
-    const int field_pic= s->picture_structure != PICT_FRAME;
-
-    s->resync_mb_x= s->mb_x = 
-    s->resync_mb_y= s->mb_y = -1;
-    
-    start_code = (start_code - 1) & 0xff;
-    if (start_code >= s->mb_height){
-        av_log(s->avctx, AV_LOG_ERROR, "slice below image (%d >= %d)\n", start_code, s->mb_height);
-        return -1;
-    }
-    
-    ff_mpeg1_clean_buffers(s);
-    s->interlaced_dct = 0;
-        
     /* start frame decoding */
-    if (s->first_slice) {
-      if(s->first_field || s->picture_structure==PICT_FRAME){
+    if(s->first_field || s->picture_structure==PICT_FRAME){
         if(MPV_frame_start(s, avctx) < 0)
-            return DECODE_SLICE_FATAL_ERROR;
+            return -1;
 
         ff_er_frame_start(s);
 
@@ -2069,7 +2041,7 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
         }         
 
         *s->current_picture_ptr->pan_scan= s1->pan_scan;
-      }else{ //second field
+    }else{ //second field
             int i;
             
             if(!s->current_picture_ptr){
@@ -2083,30 +2055,48 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
                     s->current_picture.data[i] += s->current_picture_ptr->linesize[i];
                 } 
             }
-      }
+    }
 #ifdef HAVE_XVMC
 // MPV_frame_start will call this function too,
 // but we need to call it on every field
-      if(s->avctx->xvmc_acceleration)
+    if(s->avctx->xvmc_acceleration)
          XVMC_field_start(s,avctx);
 #endif
-    }//fi(s->first_slice)
 
+    return 0;
+}
+
+#define DECODE_SLICE_ERROR -1
+#define DECODE_SLICE_OK 0
+
+/**
+ * decodes a slice. MpegEncContext.mb_y must be set to the MB row from the startcode
+ * @return DECODE_SLICE_ERROR if the slice is damaged<br>
+ *         DECODE_SLICE_OK if this slice is ok<br>
+ */
+static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y,
+                              uint8_t **buf, int buf_size)
+{
+    MpegEncContext *s = &s1->mpeg_enc_ctx;
+    AVCodecContext *avctx= s->avctx;
+    int ret;
+    const int field_pic= s->picture_structure != PICT_FRAME;
+
+    s->resync_mb_x=
+    s->resync_mb_y= -1;
+
+    if (mb_y >= s->mb_height){
+        av_log(s->avctx, AV_LOG_ERROR, "slice below image (%d >= %d)\n", s->mb_y, s->mb_height);
+        return -1;
+    }
+    
     init_get_bits(&s->gb, *buf, buf_size*8);
 
-    s->qscale = get_qscale(s);
-    if (s->first_slice && (s->first_field || s->picture_structure==PICT_FRAME)) {
-        if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-             av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%2d%2d%2d%2d %s %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n", 
-                 s->qscale, s->mpeg_f_code[0][0],s->mpeg_f_code[0][1],s->mpeg_f_code[1][0],s->mpeg_f_code[1][1],
-                 s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), 
-                 s->progressive_sequence ? "ps" :"", s->progressive_frame ? "pf" : "", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"", 
-                 s->intra_dc_precision, s->picture_structure, s->frame_pred_frame_dct, s->concealment_motion_vectors,
-                 s->q_scale_type, s->intra_vlc_format, s->repeat_first_field, s->chroma_420_type ? "420" :"");
-        }
-    }
+    ff_mpeg1_clean_buffers(s);
+    s->interlaced_dct = 0;
+
+    s->qscale = get_qscale(s);
 
-    s->first_slice = 0;
     if(s->qscale == 0){
         av_log(s->avctx, AV_LOG_ERROR, "qscale == 0\n");
         return -1;
@@ -2135,12 +2125,23 @@ static int mpeg_decode_slice(AVCodecContext *avctx,
             break;
         }
     }
-    
+
     s->resync_mb_x= s->mb_x;
-    s->resync_mb_y= s->mb_y = start_code;
+    s->resync_mb_y= s->mb_y= mb_y;
     s->mb_skip_run= 0;
     ff_init_block_index(s);
 
+    if (s->mb_y==0 && s->mb_x==0 && (s->first_field || s->picture_structure==PICT_FRAME)) {
+        if(s->avctx->debug&FF_DEBUG_PICT_INFO){
+             av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%2d%2d%2d%2d %s %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n", 
+                 s->qscale, s->mpeg_f_code[0][0],s->mpeg_f_code[0][1],s->mpeg_f_code[1][0],s->mpeg_f_code[1][1],
+                 s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), 
+                 s->progressive_sequence ? "ps" :"", s->progressive_frame ? "pf" : "", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"", 
+                 s->intra_dc_precision, s->picture_structure, s->frame_pred_frame_dct, s->concealment_motion_vectors,
+                 s->q_scale_type, s->intra_vlc_format, s->repeat_first_field, s->chroma_420_type ? "420" :"");
+        }
+    }    
+    
     for(;;) {
 #ifdef HAVE_XVMC
         //one 1 we memcpy blocks in xvmcvideo
@@ -2268,6 +2269,39 @@ eos: // end of slice
     return 0;
 }
 
+static int slice_decode_thread(AVCodecContext *c, void *arg){
+    MpegEncContext *s= arg;
+    uint8_t *buf= s->gb.buffer;
+    int mb_y= s->start_mb_y;
+
+    s->error_count= 3*(s->end_mb_y - s->start_mb_y)*s->mb_width;
+
+    for(;;){
+        int start_code, ret;
+
+        ret= mpeg_decode_slice((Mpeg1Context*)s, mb_y, &buf, s->gb.buffer_end - buf);
+        emms_c();
+//av_log(c, AV_LOG_DEBUG, "ret:%d resync:%d/%d mb:%d/%d ts:%d/%d ec:%d\n", 
+//ret, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, s->start_mb_y, s->end_mb_y, s->error_count);
+        if(ret < 0){
+            if(s->resync_mb_x>=0 && s->resync_mb_y>=0)
+                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, AC_ERROR|DC_ERROR|MV_ERROR);
+        }else{
+            ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, AC_END|DC_END|MV_END);
+        }
+        
+        if(s->mb_y == s->end_mb_y)
+            return 0;
+        
+        start_code = find_start_code(&buf, s->gb.buffer_end);
+        mb_y= start_code - SLICE_MIN_START_CODE;
+        if(mb_y < 0 || mb_y >= s->end_mb_y)
+            return -1;
+    }
+    
+    return 0; //not reached
+}
+
 /**
  * handles slice ends.
  * @return 1 if it seems to be the last slice of 
@@ -2621,12 +2655,21 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
 
     if(s->mpeg_enc_ctx_allocated==0 && avctx->codec_tag == ff_get_fourcc("VCR2"))
         vcr2_init_sequence(avctx);
-
+    
+    s->slice_count= 0;
+        
     for(;;) {
         /* find start next code */
         start_code = find_start_code(&buf_ptr, buf_end);
         if (start_code < 0){
             if(s2->pict_type != B_TYPE || avctx->hurry_up==0){
+                if(avctx->thread_count > 1){
+                    int i;
+
+                    avctx->execute(avctx, slice_decode_thread,  (void**)&(s2->thread_context[0]), NULL, s->slice_count);
+                    for(i=0; i<s->slice_count; i++)
+                        s2->error_count += s2->thread_context[i]->error_count;
+                }
                 if (slice_end(avctx, picture)) {
                     if(s2->last_picture_ptr || s2->low_delay) //FIXME merge with the stuff in mpeg_decode_slice
                         *data_size = sizeof(AVPicture);
@@ -2667,6 +2710,7 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
                 default:
                     if (start_code >= SLICE_MIN_START_CODE &&
                         start_code <= SLICE_MAX_START_CODE) {
+                        int mb_y= start_code - SLICE_MIN_START_CODE;
                         
                         /* skip b frames if we dont have reference frames */
                         if(s2->last_picture_ptr==NULL && s2->pict_type==B_TYPE) break;
@@ -2676,17 +2720,38 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
                         if(avctx->hurry_up>=5) break;
                         
                         if (!s->mpeg_enc_ctx_allocated) break;
-
-                        ret = mpeg_decode_slice(avctx, picture,
-                                                start_code, &buf_ptr, input_size);
-                        emms_c();
-
-                        if(ret < 0){
-                            if(s2->resync_mb_x>=0 && s2->resync_mb_y>=0)
-                                ff_er_add_slice(s2, s2->resync_mb_x, s2->resync_mb_y, s2->mb_x, s2->mb_y, AC_ERROR|DC_ERROR|MV_ERROR);
-                            if(ret==DECODE_SLICE_FATAL_ERROR) return -1;
+                        
+                        if(s2->first_slice){
+                            s2->first_slice=0;
+                            if(mpeg_field_start(s2) < 0)
+                                return -1;
+                        }
+                        
+                        if(avctx->thread_count > 1){
+                            int threshold= (s2->mb_height*s->slice_count + avctx->thread_count/2) / avctx->thread_count;
+                            if(threshold <= mb_y){
+                                MpegEncContext *thread_context= s2->thread_context[s->slice_count];
+                                
+                                thread_context->start_mb_y= mb_y;
+                                thread_context->end_mb_y  = s2->mb_height;
+                                if(s->slice_count){
+                                    s2->thread_context[s->slice_count-1]->end_mb_y= mb_y;
+                                    ff_update_duplicate_context(thread_context, s2);
+                                }
+                                init_get_bits(&thread_context->gb, buf_ptr, input_size*8);
+                                s->slice_count++;
+                            }
+                            buf_ptr += 2; //FIXME add minimum num of bytes per slice
                         }else{
-                            ff_er_add_slice(s2, s2->resync_mb_x, s2->resync_mb_y, s2->mb_x-1, s2->mb_y, AC_END|DC_END|MV_END);
+                            ret = mpeg_decode_slice(s, mb_y, &buf_ptr, input_size);
+                            emms_c();
+
+                            if(ret < 0){
+                                if(s2->resync_mb_x>=0 && s2->resync_mb_y>=0)
+                                    ff_er_add_slice(s2, s2->resync_mb_x, s2->resync_mb_y, s2->mb_x, s2->mb_y, AC_ERROR|DC_ERROR|MV_ERROR);
+                            }else{
+                                ff_er_add_slice(s2, s2->resync_mb_x, s2->resync_mb_y, s2->mb_x-1, s2->mb_y, AC_END|DC_END|MV_END);
+                            }
                         }
                     }
                     break;
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index e92022b0cc..3b36f97244 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -468,13 +468,17 @@ static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
 #undef COPY
 }
 
-static void update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
+void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
     MpegEncContext bak;
+    int i;
     //FIXME copy only needed parts
 //START_TIMER
     backup_duplicate_context(&bak, dst);
     memcpy(dst, src, sizeof(MpegEncContext));
     backup_duplicate_context(dst, &bak);
+    for(i=0;i<12;i++){
+        dst->pblocks[i] = (short *)(&dst->block[i]);
+    }
 //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
 }
 
@@ -4632,7 +4636,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
     
     s->mb_intra=0; //for the rate distoration & bit compare functions
     for(i=1; i<s->avctx->thread_count; i++){
-        update_duplicate_context(s->thread_context[i], s);
+        ff_update_duplicate_context(s->thread_context[i], s);
     }
     
     /* Estimate motion for every MB */
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index 6eb3a0fcff..e6e242323e 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -736,6 +736,7 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict);
 void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix);
 int ff_find_unused_picture(MpegEncContext *s, int shared);
 void ff_denoise_dct(MpegEncContext *s, DCTELEM *block);
+void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src);
 
 void ff_er_frame_start(MpegEncContext *s);
 void ff_er_frame_end(MpegEncContext *s);