diff --git a/configure b/configure index 4691c280a1..632ba44c7e 100755 --- a/configure +++ b/configure @@ -1829,6 +1829,7 @@ CONFIG_EXTRA=" mpegvideo mpegvideoenc nettle + pixblockdsp qpeldsp rangecoder riffdec @@ -1997,7 +1998,7 @@ threads_if_any="$THREADS_LIST" # subsystems dct_select="rdft" -dsputil_select="fdctdsp idctdsp" +dsputil_select="fdctdsp idctdsp pixblockdsp" error_resilience_select="dsputil" frame_thread_encoder_deps="encoders threads" intrax8_select="error_resilience" @@ -2007,7 +2008,7 @@ mpeg_er_select="error_resilience" mpegaudio_select="mpegaudiodsp" mpegaudiodsp_select="dct" mpegvideo_select="blockdsp dsputil h264chroma hpeldsp idctdsp videodsp" -mpegvideoenc_select="dsputil mpegvideo qpeldsp" +mpegvideoenc_select="dsputil mpegvideo pixblockdsp qpeldsp" # decoders / encoders aac_decoder_select="mdct sinewin" @@ -2026,9 +2027,9 @@ amv_decoder_select="sp5x_decoder exif" amv_encoder_select="aandcttables mpegvideoenc" ape_decoder_select="bswapdsp llauddsp" asv1_decoder_select="blockdsp bswapdsp idctdsp" -asv1_encoder_select="bswapdsp dsputil fdctdsp" +asv1_encoder_select="bswapdsp fdctdsp pixblockdsp" asv2_decoder_select="blockdsp bswapdsp idctdsp" -asv2_encoder_select="bswapdsp dsputil fdctdsp" +asv2_encoder_select="bswapdsp fdctdsp pixblockdsp" atrac1_decoder_select="mdct sinewin" atrac3_decoder_select="mdct" atrac3p_decoder_select="mdct sinewin" @@ -2045,9 +2046,9 @@ cscd_decoder_suggest="zlib" dca_decoder_select="mdct" dirac_decoder_select="dsputil dwt golomb videodsp" dnxhd_decoder_select="blockdsp idctdsp" -dnxhd_encoder_select="aandcttables blockdsp dsputil fdctdsp idctdsp mpegvideoenc" +dnxhd_encoder_select="aandcttables blockdsp fdctdsp idctdsp mpegvideoenc pixblockdsp" dvvideo_decoder_select="dvprofile idctdsp" -dvvideo_encoder_select="dsputil dvprofile fdctdsp" +dvvideo_encoder_select="dsputil dvprofile fdctdsp pixblockdsp" dxa_decoder_select="zlib" eac3_decoder_select="ac3_decoder" eac3_encoder_select="ac3_encoder" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 3a2f5baa6b..0bbfa27e48 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -82,6 +82,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideodsp.o \ OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \ motion_est.o ratecontrol.o \ mpegvideoencdsp.o +OBJS-$(CONFIG_PIXBLOCKDSP) += pixblockdsp.o OBJS-$(CONFIG_QPELDSP) += qpeldsp.o OBJS-$(CONFIG_RANGECODER) += rangecoder.o RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index fbbd0696b7..6b80de8a2b 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -24,6 +24,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_init_arm.o OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o +OBJS-$(CONFIG_PIXBLOCKDSP) += arm/pixblockdsp_init_arm.o OBJS-$(CONFIG_VIDEODSP) += arm/videodsp_init_arm.o OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o @@ -63,6 +64,7 @@ ARMV6-OBJS-$(CONFIG_IDCTDSP) += arm/idctdsp_init_armv6.o \ arm/simple_idct_armv6.o ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o ARMV6-OBJS-$(CONFIG_MPEGVIDEOENC) += arm/mpegvideoencdsp_armv6.o +ARMV6-OBJS-$(CONFIG_PIXBLOCKDSP) += arm/pixblockdsp_armv6.o ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o ARMV6-OBJS-$(CONFIG_VC1_DECODER) += arm/startcode_armv6.o diff --git a/libavcodec/arm/dsputil_armv6.S b/libavcodec/arm/dsputil_armv6.S index 60232243e5..fa5a82301e 100644 --- a/libavcodec/arm/dsputil_armv6.S +++ b/libavcodec/arm/dsputil_armv6.S @@ -20,61 +20,6 @@ #include "libavutil/arm/asm.S" -function ff_get_pixels_armv6, export=1 - pld [r1, r2] - push {r4-r8, lr} - mov lr, #8 -1: - ldrd_post r4, r5, r1, r2 - subs lr, lr, #1 - uxtb16 r6, r4 - uxtb16 r4, r4, ror #8 - uxtb16 r12, r5 - uxtb16 r8, r5, ror #8 - pld [r1, r2] - pkhbt r5, r6, r4, lsl #16 - pkhtb r6, r4, r6, asr #16 - pkhbt r7, r12, r8, lsl #16 - pkhtb r12, r8, r12, asr #16 - stm r0!, {r5,r6,r7,r12} - bgt 1b - - pop {r4-r8, pc} -endfunc - -function ff_diff_pixels_armv6, export=1 - pld [r1, r3] - pld [r2, r3] - push {r4-r9, lr} - mov lr, #8 -1: - ldrd_post r4, r5, r1, r3 - ldrd_post r6, r7, r2, r3 - uxtb16 r8, r4 - uxtb16 r4, r4, ror #8 - uxtb16 r9, r6 - uxtb16 r6, r6, ror #8 - pld [r1, r3] - ssub16 r9, r8, r9 - ssub16 r6, r4, r6 - uxtb16 r8, r5 - uxtb16 r5, r5, ror #8 - pld [r2, r3] - pkhbt r4, r9, r6, lsl #16 - pkhtb r6, r6, r9, asr #16 - uxtb16 r9, r7 - uxtb16 r7, r7, ror #8 - ssub16 r9, r8, r9 - ssub16 r5, r5, r7 - subs lr, lr, #1 - pkhbt r8, r9, r5, lsl #16 - pkhtb r9, r5, r9, asr #16 - stm r0!, {r4,r6,r8,r9} - bgt 1b - - pop {r4-r9, pc} -endfunc - function ff_pix_abs16_armv6, export=1 ldr r0, [sp] push {r4-r9, lr} diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c index 1cfad42183..86d84f5744 100644 --- a/libavcodec/arm/dsputil_init_armv6.c +++ b/libavcodec/arm/dsputil_init_armv6.c @@ -26,10 +26,6 @@ #include "libavcodec/mpegvideo.h" #include "dsputil_arm.h" -void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride); -void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1, - const uint8_t *s2, int stride); - int ff_pix_abs16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2, int line_size, int h); int ff_pix_abs16_x2_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2, @@ -46,10 +42,6 @@ int ff_sse16_armv6(MpegEncContext *s, uint8_t *blk1, uint8_t *blk2, av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) { - if (!high_bit_depth) - c->get_pixels = ff_get_pixels_armv6; - c->diff_pixels = ff_diff_pixels_armv6; - c->pix_abs[0][0] = ff_pix_abs16_armv6; c->pix_abs[0][1] = ff_pix_abs16_x2_armv6; c->pix_abs[0][2] = ff_pix_abs16_y2_armv6; diff --git a/libavcodec/arm/pixblockdsp_armv6.S b/libavcodec/arm/pixblockdsp_armv6.S new file mode 100644 index 0000000000..b10ea78e88 --- /dev/null +++ b/libavcodec/arm/pixblockdsp_armv6.S @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2009 Mans Rullgard + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/arm/asm.S" + +function ff_get_pixels_armv6, export=1 + pld [r1, r2] + push {r4-r8, lr} + mov lr, #8 +1: + ldrd_post r4, r5, r1, r2 + subs lr, lr, #1 + uxtb16 r6, r4 + uxtb16 r4, r4, ror #8 + uxtb16 r12, r5 + uxtb16 r8, r5, ror #8 + pld [r1, r2] + pkhbt r5, r6, r4, lsl #16 + pkhtb r6, r4, r6, asr #16 + pkhbt r7, r12, r8, lsl #16 + pkhtb r12, r8, r12, asr #16 + stm r0!, {r5,r6,r7,r12} + bgt 1b + + pop {r4-r8, pc} +endfunc + +function ff_diff_pixels_armv6, export=1 + pld [r1, r3] + pld [r2, r3] + push {r4-r9, lr} + mov lr, #8 +1: + ldrd_post r4, r5, r1, r3 + ldrd_post r6, r7, r2, r3 + uxtb16 r8, r4 + uxtb16 r4, r4, ror #8 + uxtb16 r9, r6 + uxtb16 r6, r6, ror #8 + pld [r1, r3] + ssub16 r9, r8, r9 + ssub16 r6, r4, r6 + uxtb16 r8, r5 + uxtb16 r5, r5, ror #8 + pld [r2, r3] + pkhbt r4, r9, r6, lsl #16 + pkhtb r6, r6, r9, asr #16 + uxtb16 r9, r7 + uxtb16 r7, r7, ror #8 + ssub16 r9, r8, r9 + ssub16 r5, r5, r7 + subs lr, lr, #1 + pkhbt r8, r9, r5, lsl #16 + pkhtb r9, r5, r9, asr #16 + stm r0!, {r4,r6,r8,r9} + bgt 1b + + pop {r4-r9, pc} +endfunc diff --git a/libavcodec/arm/pixblockdsp_init_arm.c b/libavcodec/arm/pixblockdsp_init_arm.c new file mode 100644 index 0000000000..b77c523a6e --- /dev/null +++ b/libavcodec/arm/pixblockdsp_init_arm.c @@ -0,0 +1,42 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/arm/cpu.h" +#include "libavcodec/avcodec.h" +#include "libavcodec/pixblockdsp.h" + +void ff_get_pixels_armv6(int16_t *block, const uint8_t *pixels, int stride); +void ff_diff_pixels_armv6(int16_t *block, const uint8_t *s1, + const uint8_t *s2, int stride); + +av_cold void ff_pixblockdsp_init_arm(PixblockDSPContext *c, + AVCodecContext *avctx, + unsigned high_bit_depth) +{ + int cpu_flags = av_get_cpu_flags(); + + if (have_armv6(cpu_flags)) { + if (!high_bit_depth) + c->get_pixels = ff_get_pixels_armv6; + c->diff_pixels = ff_diff_pixels_armv6; + } +} diff --git a/libavcodec/asv.h b/libavcodec/asv.h index e1f90e51ec..a0e8fef703 100644 --- a/libavcodec/asv.h +++ b/libavcodec/asv.h @@ -33,19 +33,19 @@ #include "avcodec.h" #include "blockdsp.h" #include "bswapdsp.h" -#include "dsputil.h" #include "fdctdsp.h" #include "idctdsp.h" #include "get_bits.h" +#include "pixblockdsp.h" #include "put_bits.h" typedef struct ASV1Context{ AVCodecContext *avctx; BlockDSPContext bdsp; BswapDSPContext bbdsp; - DSPContext dsp; FDCTDSPContext fdsp; IDCTDSPContext idsp; + PixblockDSPContext pdsp; PutBitContext pb; GetBitContext gb; ScanTable scantable; diff --git a/libavcodec/asvenc.c b/libavcodec/asvenc.c index ae81953f30..02cf2db991 100644 --- a/libavcodec/asvenc.c +++ b/libavcodec/asvenc.c @@ -160,16 +160,16 @@ static inline void dct_get(ASV1Context *a, const AVFrame *frame, uint8_t *ptr_cb = frame->data[1] + (mb_y * 8 * frame->linesize[1]) + mb_x * 8; uint8_t *ptr_cr = frame->data[2] + (mb_y * 8 * frame->linesize[2]) + mb_x * 8; - a->dsp.get_pixels(block[0], ptr_y , linesize); - a->dsp.get_pixels(block[1], ptr_y + 8, linesize); - a->dsp.get_pixels(block[2], ptr_y + 8*linesize , linesize); - a->dsp.get_pixels(block[3], ptr_y + 8*linesize + 8, linesize); + a->pdsp.get_pixels(block[0], ptr_y, linesize); + a->pdsp.get_pixels(block[1], ptr_y + 8, linesize); + a->pdsp.get_pixels(block[2], ptr_y + 8 * linesize, linesize); + a->pdsp.get_pixels(block[3], ptr_y + 8 * linesize + 8, linesize); for(i=0; i<4; i++) a->fdsp.fdct(block[i]); if(!(a->avctx->flags&CODEC_FLAG_GRAY)){ - a->dsp.get_pixels(block[4], ptr_cb, frame->linesize[1]); - a->dsp.get_pixels(block[5], ptr_cr, frame->linesize[2]); + a->pdsp.get_pixels(block[4], ptr_cb, frame->linesize[1]); + a->pdsp.get_pixels(block[5], ptr_cr, frame->linesize[2]); for(i=4; i<6; i++) a->fdsp.fdct(block[i]); } @@ -282,8 +282,8 @@ static av_cold int encode_init(AVCodecContext *avctx){ const int scale= avctx->codec_id == AV_CODEC_ID_ASV1 ? 1 : 2; ff_asv_common_init(avctx); - ff_dsputil_init(&a->dsp, avctx); ff_fdctdsp_init(&a->fdsp, avctx); + ff_pixblockdsp_init(&a->pdsp, avctx); if(avctx->global_quality <= 0) avctx->global_quality= 4*FF_QUALITY_SCALE; diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c index f6f9af833a..3ad625352a 100644 --- a/libavcodec/dnxhdenc.c +++ b/libavcodec/dnxhdenc.c @@ -30,10 +30,10 @@ #include "avcodec.h" #include "blockdsp.h" -#include "dsputil.h" #include "fdctdsp.h" #include "internal.h" #include "mpegvideo.h" +#include "pixblockdsp.h" #include "dnxhdenc.h" @@ -326,6 +326,7 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx) ff_fdctdsp_init(&ctx->m.fdsp, avctx); ff_idctdsp_init(&ctx->m.idsp, avctx); ff_mpegvideoencdsp_init(&ctx->m.mpvencdsp, avctx); + ff_pixblockdsp_init(&ctx->m.pdsp, avctx); ff_dct_common_init(&ctx->m); ff_dct_encode_init(&ctx->m); @@ -561,12 +562,12 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y) ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs); const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs); - DSPContext *dsp = &ctx->m.dsp; + PixblockDSPContext *pdsp = &ctx->m.pdsp; - dsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize); - dsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize); - dsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize); - dsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize); + pdsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize); + pdsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize); + pdsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize); + pdsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize); if (mb_y + 1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) { if (ctx->interlaced) { @@ -589,14 +590,14 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y) ctx->bdsp.clear_block(ctx->blocks[7]); } } else { - dsp->get_pixels(ctx->blocks[4], - ptr_y + ctx->dct_y_offset, ctx->m.linesize); - dsp->get_pixels(ctx->blocks[5], - ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize); - dsp->get_pixels(ctx->blocks[6], - ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize); - dsp->get_pixels(ctx->blocks[7], - ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize); + pdsp->get_pixels(ctx->blocks[4], + ptr_y + ctx->dct_y_offset, ctx->m.linesize); + pdsp->get_pixels(ctx->blocks[5], + ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize); + pdsp->get_pixels(ctx->blocks[6], + ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize); + pdsp->get_pixels(ctx->blocks[7], + ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize); } } diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index c68a70a79e..1cd9658ba6 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -36,13 +36,6 @@ uint32_t ff_square_tab[512] = { 0, }; -#define BIT_DEPTH 16 -#include "dsputilenc_template.c" -#undef BIT_DEPTH - -#define BIT_DEPTH 8 -#include "dsputilenc_template.c" - static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { @@ -111,27 +104,6 @@ static int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, return s; } -static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1, - const uint8_t *s2, int stride) -{ - int i; - - /* read the pixels */ - for (i = 0; i < 8; i++) { - block[0] = s1[0] - s2[0]; - block[1] = s1[1] - s2[1]; - block[2] = s1[2] - s2[2]; - block[3] = s1[3] - s2[3]; - block[4] = s1[4] - s2[4]; - block[5] = s1[5] - s2[5]; - block[6] = s1[6] - s2[6]; - block[7] = s1[7] - s2[7]; - s1 += stride; - s2 += stride; - block += 8; - } -} - static int sum_abs_dctelem_c(int16_t *block) { int sum = 0, i; @@ -586,7 +558,7 @@ static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1, av_assert2(h == 8); - s->dsp.diff_pixels(temp, src1, src2, stride); + s->pdsp.diff_pixels(temp, src1, src2, stride); s->fdsp.fdct(temp); return s->dsp.sum_abs_dctelem(temp); } @@ -626,7 +598,7 @@ static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1, int16_t dct[8][8]; int i, sum = 0; - s->dsp.diff_pixels(dct[0], src1, src2, stride); + s->pdsp.diff_pixels(dct[0], src1, src2, stride); #define SRC(x) dct[i][x] #define DST(x, v) dct[i][x] = v @@ -653,7 +625,7 @@ static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1, av_assert2(h == 8); - s->dsp.diff_pixels(temp, src1, src2, stride); + s->pdsp.diff_pixels(temp, src1, src2, stride); s->fdsp.fdct(temp); for (i = 0; i < 64; i++) @@ -672,7 +644,7 @@ static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1, av_assert2(h == 8); s->mb_intra = 0; - s->dsp.diff_pixels(temp, src1, src2, stride); + s->pdsp.diff_pixels(temp, src1, src2, stride); memcpy(bak, temp, 64 * sizeof(int16_t)); @@ -703,7 +675,7 @@ static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, copy_block8(lsrc1, src1, 8, stride, 8); copy_block8(lsrc2, src2, 8, stride, 8); - s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8); + s->pdsp.diff_pixels(temp, lsrc1, lsrc2, 8); s->block_last_index[0 /* FIXME */] = last = @@ -775,7 +747,7 @@ static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, av_assert2(h == 8); - s->dsp.diff_pixels(temp, src1, src2, stride); + s->pdsp.diff_pixels(temp, src1, src2, stride); s->block_last_index[0 /* FIXME */] = last = @@ -971,8 +943,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) ff_check_alignment(); - c->diff_pixels = diff_pixels_c; - c->sum_abs_dctelem = sum_abs_dctelem_c; /* TODO [0] 16 [1] 8 */ @@ -1019,21 +989,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) ff_dsputil_init_dwt(c); #endif - switch (avctx->bits_per_raw_sample) { - case 9: - case 10: - case 12: - case 14: - c->get_pixels = get_pixels_16_c; - break; - default: - if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) { - c->get_pixels = get_pixels_8_c; - } - break; - } - - if (ARCH_ALPHA) ff_dsputil_init_alpha(c, avctx); if (ARCH_ARM) diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index ea5d13c72d..5f4ba349e7 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -62,14 +62,6 @@ typedef int (*me_cmp_func)(struct MpegEncContext *c, * DSPContext. */ typedef struct DSPContext { - /* pixel ops : interface with DCT */ - void (*get_pixels)(int16_t *block /* align 16 */, - const uint8_t *pixels /* align 8 */, - int line_size); - void (*diff_pixels)(int16_t *block /* align 16 */, - const uint8_t *s1 /* align 8 */, - const uint8_t *s2 /* align 8 */, - int stride); int (*sum_abs_dctelem)(int16_t *block /* align 16 */); me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */ diff --git a/libavcodec/dvenc.c b/libavcodec/dvenc.c index a60b834dfe..aeb4a33259 100644 --- a/libavcodec/dvenc.c +++ b/libavcodec/dvenc.c @@ -31,6 +31,7 @@ #include "dsputil.h" #include "fdctdsp.h" #include "internal.h" +#include "pixblockdsp.h" #include "put_bits.h" #include "dv.h" #include "dv_tablegen.h" @@ -41,6 +42,7 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx) DVVideoContext *s = avctx->priv_data; DSPContext dsp; FDCTDSPContext fdsp; + PixblockDSPContext pdsp; int ret; s->sys = avpriv_dv_codec_profile(avctx); @@ -70,9 +72,10 @@ static av_cold int dvvideo_encode_init(AVCodecContext *avctx) memset(&dsp,0, sizeof(dsp)); ff_dsputil_init(&dsp, avctx); ff_fdctdsp_init(&fdsp, avctx); + ff_pixblockdsp_init(&pdsp, avctx); ff_set_cmp(&dsp, dsp.ildct_cmp, avctx->ildct_cmp); - s->get_pixels = dsp.get_pixels; + s->get_pixels = pdsp.get_pixels; s->ildct_cmp = dsp.ildct_cmp[5]; s->fdct[0] = fdsp.fdct; diff --git a/libavcodec/libavcodec.v b/libavcodec/libavcodec.v index 5909dce46b..5a8c005b97 100644 --- a/libavcodec/libavcodec.v +++ b/libavcodec/libavcodec.v @@ -29,5 +29,6 @@ LIBAVCODEC_$MAJOR { ff_dnxhd_cid_table; ff_idctdsp_init; ff_fdctdsp_init; + ff_pixblockdsp_init; local: *; }; diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index f2d884bad5..94937e5cdc 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -40,6 +40,7 @@ #include "idctdsp.h" #include "mpegvideodsp.h" #include "mpegvideoencdsp.h" +#include "pixblockdsp.h" #include "put_bits.h" #include "ratecontrol.h" #include "parser.h" @@ -371,6 +372,7 @@ typedef struct MpegEncContext { IDCTDSPContext idsp; MpegVideoDSPContext mdsp; MpegvideoEncDSPContext mpvencdsp; + PixblockDSPContext pdsp; QpelDSPContext qdsp; VideoDSPContext vdsp; H263DSPContext h263dsp; diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c index 826f061eea..56867ccb85 100644 --- a/libavcodec/mpegvideo_enc.c +++ b/libavcodec/mpegvideo_enc.c @@ -37,7 +37,6 @@ #include "libavutil/timer.h" #include "avcodec.h" #include "dct.h" -#include "dsputil.h" #include "idctdsp.h" #include "mpeg12.h" #include "mpegvideo.h" @@ -48,6 +47,7 @@ #include "mpegutils.h" #include "mjpegenc.h" #include "msmpeg4.h" +#include "pixblockdsp.h" #include "qpeldsp.h" #include "faandct.h" #include "thread.h" @@ -820,6 +820,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx) ff_fdctdsp_init(&s->fdsp, avctx); ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); + ff_pixblockdsp_init(&s->pdsp, avctx); ff_qpeldsp_init(&s->qdsp); s->avctx->coded_frame = s->current_picture.f; @@ -2093,27 +2094,27 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s, } } - s->dsp.get_pixels(s->block[0], ptr_y , wrap_y); - s->dsp.get_pixels(s->block[1], ptr_y + 8 , wrap_y); - s->dsp.get_pixels(s->block[2], ptr_y + dct_offset , wrap_y); - s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8 , wrap_y); + s->pdsp.get_pixels(s->block[0], ptr_y, wrap_y); + s->pdsp.get_pixels(s->block[1], ptr_y + 8, wrap_y); + s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset, wrap_y); + s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y); if (s->flags & CODEC_FLAG_GRAY) { skip_dct[4] = 1; skip_dct[5] = 1; } else { - s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c); - s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c); + s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c); + s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c); if (!s->chroma_y_shift && s->chroma_x_shift) { /* 422 */ - s->dsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c); - s->dsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c); + s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c); + s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c); } else if (!s->chroma_y_shift && !s->chroma_x_shift) { /* 444 */ - s->dsp.get_pixels(s->block[6], ptr_cb + 8, wrap_c); - s->dsp.get_pixels(s->block[7], ptr_cr + 8, wrap_c); - s->dsp.get_pixels(s->block[8], ptr_cb + uv_dct_offset, wrap_c); - s->dsp.get_pixels(s->block[9], ptr_cr + uv_dct_offset, wrap_c); - s->dsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c); - s->dsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c); + s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c); + s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c); + s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c); + s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c); + s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c); + s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c); } } } else { @@ -2180,24 +2181,24 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s, } } - s->dsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y); - s->dsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y); - s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset, - dest_y + dct_offset, wrap_y); - s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, - dest_y + dct_offset + 8, wrap_y); + s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y); + s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y); + s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset, + dest_y + dct_offset, wrap_y); + s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, + dest_y + dct_offset + 8, wrap_y); if (s->flags & CODEC_FLAG_GRAY) { skip_dct[4] = 1; skip_dct[5] = 1; } else { - s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c); - s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c); + s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c); + s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c); if (!s->chroma_y_shift) { /* 422 */ - s->dsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset, - dest_cb + uv_dct_offset, wrap_c); - s->dsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset, - dest_cr + uv_dct_offset, wrap_c); + s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset, + dest_cb + uv_dct_offset, wrap_c); + s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset, + dest_cr + uv_dct_offset, wrap_c); } } /* pre quantization */ diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c new file mode 100644 index 0000000000..a69948e43e --- /dev/null +++ b/libavcodec/pixblockdsp.c @@ -0,0 +1,80 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "config.h" +#include "libavutil/attributes.h" +#include "avcodec.h" +#include "pixblockdsp.h" + +#define BIT_DEPTH 16 +#include "pixblockdsp_template.c" +#undef BIT_DEPTH + +#define BIT_DEPTH 8 +#include "pixblockdsp_template.c" + +static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1, + const uint8_t *s2, int stride) +{ + int i; + + /* read the pixels */ + for (i = 0; i < 8; i++) { + block[0] = s1[0] - s2[0]; + block[1] = s1[1] - s2[1]; + block[2] = s1[2] - s2[2]; + block[3] = s1[3] - s2[3]; + block[4] = s1[4] - s2[4]; + block[5] = s1[5] - s2[5]; + block[6] = s1[6] - s2[6]; + block[7] = s1[7] - s2[7]; + s1 += stride; + s2 += stride; + block += 8; + } +} + +av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx) +{ + const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; + + c->diff_pixels = diff_pixels_c; + + switch (avctx->bits_per_raw_sample) { + case 9: + case 10: + case 12: + case 14: + c->get_pixels = get_pixels_16_c; + break; + default: + if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) { + c->get_pixels = get_pixels_8_c; + } + break; + } + + if (ARCH_ARM) + ff_pixblockdsp_init_arm(c, avctx, high_bit_depth); + if (ARCH_PPC) + ff_pixblockdsp_init_ppc(c, avctx, high_bit_depth); + if (ARCH_X86) + ff_pixblockdsp_init_x86(c, avctx, high_bit_depth); +} diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h new file mode 100644 index 0000000000..a724ffbef0 --- /dev/null +++ b/libavcodec/pixblockdsp.h @@ -0,0 +1,44 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_PIXBLOCKDSP_H +#define AVCODEC_PIXBLOCKDSP_H + +#include + +#include "avcodec.h" + +typedef struct PixblockDSPContext { + void (*get_pixels)(int16_t *block /* align 16 */, + const uint8_t *pixels /* align 8 */, + int line_size); + void (*diff_pixels)(int16_t *block /* align 16 */, + const uint8_t *s1 /* align 8 */, + const uint8_t *s2 /* align 8 */, + int stride); +} PixblockDSPContext; + +void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx); +void ff_pixblockdsp_init_arm(PixblockDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_pixblockdsp_init_x86(PixblockDSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); + +#endif /* AVCODEC_PIXBLOCKDSP_H */ diff --git a/libavcodec/dsputilenc_template.c b/libavcodec/pixblockdsp_template.c similarity index 85% rename from libavcodec/dsputilenc_template.c rename to libavcodec/pixblockdsp_template.c index 711c404a97..3aeddf526c 100644 --- a/libavcodec/dsputilenc_template.c +++ b/libavcodec/pixblockdsp_template.c @@ -1,10 +1,4 @@ /* - * DSP utils - * Copyright (c) 2000, 2001 Fabrice Bellard - * Copyright (c) 2002-2004 Michael Niedermayer - * - * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer - * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -22,11 +16,6 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -/** - * @file - * DSP utils - */ - #include "bit_depth_template.c" static void FUNCC(get_pixels)(int16_t *av_restrict block, const uint8_t *_pixels, diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile index 8f0f0ff308..bfa4d05578 100644 --- a/libavcodec/ppc/Makefile +++ b/libavcodec/ppc/Makefile @@ -15,6 +15,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o \ ppc/mpegvideodsp.o OBJS-$(CONFIG_MPEGVIDEOENC) += ppc/mpegvideoencdsp.o +OBJS-$(CONFIG_PIXBLOCKDSP) += ppc/pixblockdsp.o OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c index 5ab1b51e2b..4cce30ac58 100644 --- a/libavcodec/ppc/dsputil_altivec.c +++ b/libavcodec/ppc/dsputil_altivec.c @@ -402,105 +402,6 @@ static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, return s; } -static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, - int line_size) -{ - int i; - vector unsigned char perm = vec_lvsl(0, pixels); - const vector unsigned char zero = - (const vector unsigned char) vec_splat_u8(0); - - for (i = 0; i < 8; i++) { - /* Read potentially unaligned pixels. - * We're reading 16 pixels, and actually only want 8, - * but we simply ignore the extras. */ - vector unsigned char pixl = vec_ld(0, pixels); - vector unsigned char pixr = vec_ld(7, pixels); - vector unsigned char bytes = vec_perm(pixl, pixr, perm); - - // Convert the bytes into shorts. - vector signed short shorts = (vector signed short) vec_mergeh(zero, - bytes); - - // Save the data to the block, we assume the block is 16-byte aligned. - vec_st(shorts, i * 16, (vector signed short *) block); - - pixels += line_size; - } -} - -static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, - const uint8_t *s2, int stride) -{ - int i; - vector unsigned char perm1 = vec_lvsl(0, s1); - vector unsigned char perm2 = vec_lvsl(0, s2); - const vector unsigned char zero = - (const vector unsigned char) vec_splat_u8(0); - vector signed short shorts1, shorts2; - - for (i = 0; i < 4; i++) { - /* Read potentially unaligned pixels. - * We're reading 16 pixels, and actually only want 8, - * but we simply ignore the extras. */ - vector unsigned char pixl = vec_ld(0, s1); - vector unsigned char pixr = vec_ld(15, s1); - vector unsigned char bytes = vec_perm(pixl, pixr, perm1); - - // Convert the bytes into shorts. - shorts1 = (vector signed short) vec_mergeh(zero, bytes); - - // Do the same for the second block of pixels. - pixl = vec_ld(0, s2); - pixr = vec_ld(15, s2); - bytes = vec_perm(pixl, pixr, perm2); - - // Convert the bytes into shorts. - shorts2 = (vector signed short) vec_mergeh(zero, bytes); - - // Do the subtraction. - shorts1 = vec_sub(shorts1, shorts2); - - // Save the data to the block, we assume the block is 16-byte aligned. - vec_st(shorts1, 0, (vector signed short *) block); - - s1 += stride; - s2 += stride; - block += 8; - - /* The code below is a copy of the code above... - * This is a manual unroll. */ - - /* Read potentially unaligned pixels. - * We're reading 16 pixels, and actually only want 8, - * but we simply ignore the extras. */ - pixl = vec_ld(0, s1); - pixr = vec_ld(15, s1); - bytes = vec_perm(pixl, pixr, perm1); - - // Convert the bytes into shorts. - shorts1 = (vector signed short) vec_mergeh(zero, bytes); - - // Do the same for the second block of pixels. - pixl = vec_ld(0, s2); - pixr = vec_ld(15, s2); - bytes = vec_perm(pixl, pixr, perm2); - - // Convert the bytes into shorts. - shorts2 = (vector signed short) vec_mergeh(zero, bytes); - - // Do the subtraction. - shorts1 = vec_sub(shorts1, shorts2); - - // Save the data to the block, we assume the block is 16-byte aligned. - vec_st(shorts1, 0, (vector signed short *) block); - - s1 += stride; - s2 += stride; - block += 8; - } -} - static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst, uint8_t *src, int stride, int h) { @@ -854,12 +755,6 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, c->sse[0] = sse16_altivec; c->sse[1] = sse8_altivec; - c->diff_pixels = diff_pixels_altivec; - - if (!high_bit_depth) { - c->get_pixels = get_pixels_altivec; - } - c->hadamard8_diff[0] = hadamard8_diff16_altivec; c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; } diff --git a/libavcodec/ppc/pixblockdsp.c b/libavcodec/ppc/pixblockdsp.c new file mode 100644 index 0000000000..42c5be842e --- /dev/null +++ b/libavcodec/ppc/pixblockdsp.c @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2002 Brian Foley + * Copyright (c) 2002 Dieter Shirley + * Copyright (c) 2003-2004 Romain Dolbeau + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#if HAVE_ALTIVEC_H +#include +#endif + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/ppc/cpu.h" +#include "libavutil/ppc/types_altivec.h" +#include "libavutil/ppc/util_altivec.h" +#include "libavcodec/avcodec.h" +#include "libavcodec/pixblockdsp.h" + +#if HAVE_ALTIVEC + +static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, + int line_size) +{ + int i; + vector unsigned char perm = vec_lvsl(0, pixels); + const vector unsigned char zero = + (const vector unsigned char) vec_splat_u8(0); + + for (i = 0; i < 8; i++) { + /* Read potentially unaligned pixels. + * We're reading 16 pixels, and actually only want 8, + * but we simply ignore the extras. */ + vector unsigned char pixl = vec_ld(0, pixels); + vector unsigned char pixr = vec_ld(7, pixels); + vector unsigned char bytes = vec_perm(pixl, pixr, perm); + + // Convert the bytes into shorts. + vector signed short shorts = (vector signed short) vec_mergeh(zero, + bytes); + + // Save the data to the block, we assume the block is 16-byte aligned. + vec_st(shorts, i * 16, (vector signed short *) block); + + pixels += line_size; + } +} + +static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, + const uint8_t *s2, int stride) +{ + int i; + vector unsigned char perm1 = vec_lvsl(0, s1); + vector unsigned char perm2 = vec_lvsl(0, s2); + const vector unsigned char zero = + (const vector unsigned char) vec_splat_u8(0); + vector signed short shorts1, shorts2; + + for (i = 0; i < 4; i++) { + /* Read potentially unaligned pixels. + * We're reading 16 pixels, and actually only want 8, + * but we simply ignore the extras. */ + vector unsigned char pixl = vec_ld(0, s1); + vector unsigned char pixr = vec_ld(15, s1); + vector unsigned char bytes = vec_perm(pixl, pixr, perm1); + + // Convert the bytes into shorts. + shorts1 = (vector signed short) vec_mergeh(zero, bytes); + + // Do the same for the second block of pixels. + pixl = vec_ld(0, s2); + pixr = vec_ld(15, s2); + bytes = vec_perm(pixl, pixr, perm2); + + // Convert the bytes into shorts. + shorts2 = (vector signed short) vec_mergeh(zero, bytes); + + // Do the subtraction. + shorts1 = vec_sub(shorts1, shorts2); + + // Save the data to the block, we assume the block is 16-byte aligned. + vec_st(shorts1, 0, (vector signed short *) block); + + s1 += stride; + s2 += stride; + block += 8; + + /* The code below is a copy of the code above... + * This is a manual unroll. */ + + /* Read potentially unaligned pixels. + * We're reading 16 pixels, and actually only want 8, + * but we simply ignore the extras. */ + pixl = vec_ld(0, s1); + pixr = vec_ld(15, s1); + bytes = vec_perm(pixl, pixr, perm1); + + // Convert the bytes into shorts. + shorts1 = (vector signed short) vec_mergeh(zero, bytes); + + // Do the same for the second block of pixels. + pixl = vec_ld(0, s2); + pixr = vec_ld(15, s2); + bytes = vec_perm(pixl, pixr, perm2); + + // Convert the bytes into shorts. + shorts2 = (vector signed short) vec_mergeh(zero, bytes); + + // Do the subtraction. + shorts1 = vec_sub(shorts1, shorts2); + + // Save the data to the block, we assume the block is 16-byte aligned. + vec_st(shorts1, 0, (vector signed short *) block); + + s1 += stride; + s2 += stride; + block += 8; + } +} + +#endif /* HAVE_ALTIVEC */ + +av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, + AVCodecContext *avctx, + unsigned high_bit_depth) +{ +#if HAVE_ALTIVEC + if (!PPC_ALTIVEC(av_get_cpu_flags())) + return; + + c->diff_pixels = diff_pixels_altivec; + + if (!high_bit_depth) { + c->get_pixels = get_pixels_altivec; + } +#endif /* HAVE_ALTIVEC */ +} diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 0843dcc774..44ccb2040f 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -31,6 +31,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \ x86/mpegvideodsp.o OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o \ x86/mpegvideoencdsp_init.o +OBJS-$(CONFIG_PIXBLOCKDSP) += x86/pixblockdsp_init.o OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o @@ -110,6 +111,7 @@ YASM-OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp.o YASM-OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp.o YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o YASM-OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoencdsp.o +YASM-OBJS-$(CONFIG_PIXBLOCKDSP) += x86/pixblockdsp.o YASM-OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp.o \ x86/fpel.o \ x86/qpel.o diff --git a/libavcodec/x86/dsputilenc.asm b/libavcodec/x86/dsputilenc.asm index 13682ba5d4..023f512edd 100644 --- a/libavcodec/x86/dsputilenc.asm +++ b/libavcodec/x86/dsputilenc.asm @@ -352,115 +352,6 @@ SUM_SQUARED_ERRORS 16 INIT_XMM sse2 SUM_SQUARED_ERRORS 16 -INIT_MMX mmx -; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size) -cglobal get_pixels, 3,4 - movsxdifnidn r2, r2d - add r0, 128 - mov r3, -128 - pxor m7, m7 -.loop: - mova m0, [r1] - mova m2, [r1+r2] - mova m1, m0 - mova m3, m2 - punpcklbw m0, m7 - punpckhbw m1, m7 - punpcklbw m2, m7 - punpckhbw m3, m7 - mova [r0+r3+ 0], m0 - mova [r0+r3+ 8], m1 - mova [r0+r3+16], m2 - mova [r0+r3+24], m3 - lea r1, [r1+r2*2] - add r3, 32 - js .loop - REP_RET - -INIT_XMM sse2 -cglobal get_pixels, 3, 4, 5 - movsxdifnidn r2, r2d - lea r3, [r2*3] - pxor m4, m4 - movh m0, [r1] - movh m1, [r1+r2] - movh m2, [r1+r2*2] - movh m3, [r1+r3] - lea r1, [r1+r2*4] - punpcklbw m0, m4 - punpcklbw m1, m4 - punpcklbw m2, m4 - punpcklbw m3, m4 - mova [r0], m0 - mova [r0+0x10], m1 - mova [r0+0x20], m2 - mova [r0+0x30], m3 - movh m0, [r1] - movh m1, [r1+r2*1] - movh m2, [r1+r2*2] - movh m3, [r1+r3] - punpcklbw m0, m4 - punpcklbw m1, m4 - punpcklbw m2, m4 - punpcklbw m3, m4 - mova [r0+0x40], m0 - mova [r0+0x50], m1 - mova [r0+0x60], m2 - mova [r0+0x70], m3 - RET - -INIT_MMX mmx -; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, -; int stride); -cglobal diff_pixels, 4,5 - movsxdifnidn r3, r3d - pxor m7, m7 - add r0, 128 - mov r4, -128 -.loop: - mova m0, [r1] - mova m2, [r2] - mova m1, m0 - mova m3, m2 - punpcklbw m0, m7 - punpckhbw m1, m7 - punpcklbw m2, m7 - punpckhbw m3, m7 - psubw m0, m2 - psubw m1, m3 - mova [r0+r4+0], m0 - mova [r0+r4+8], m1 - add r1, r3 - add r2, r3 - add r4, 16 - jne .loop - REP_RET - -INIT_XMM sse2 -cglobal diff_pixels, 4, 5, 5 - movsxdifnidn r3, r3d - pxor m4, m4 - add r0, 128 - mov r4, -128 -.loop: - movh m0, [r1] - movh m2, [r2] - movh m1, [r1+r3] - movh m3, [r2+r3] - punpcklbw m0, m4 - punpcklbw m1, m4 - punpcklbw m2, m4 - punpcklbw m3, m4 - psubw m0, m2 - psubw m1, m3 - mova [r0+r4+0 ], m0 - mova [r0+r4+16], m1 - lea r1, [r1+r3*2] - lea r2, [r2+r3*2] - add r4, 32 - jne .loop - RET - ;----------------------------------------------- ;int ff_sum_abs_dctelem(int16_t *block) ;----------------------------------------------- diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c index d0936595d0..5d48a78daa 100644 --- a/libavcodec/x86/dsputilenc_mmx.c +++ b/libavcodec/x86/dsputilenc_mmx.c @@ -30,12 +30,6 @@ #include "libavcodec/mpegvideo.h" #include "dsputil_x86.h" -void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size); -void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size); -void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, - int stride); -void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2, - int stride); int ff_sum_abs_dctelem_mmx(int16_t *block); int ff_sum_abs_dctelem_mmxext(int16_t *block); int ff_sum_abs_dctelem_sse2(int16_t *block); @@ -353,16 +347,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, { int cpu_flags = av_get_cpu_flags(); - if (EXTERNAL_MMX(cpu_flags)) { - if (!high_bit_depth) - c->get_pixels = ff_get_pixels_mmx; - c->diff_pixels = ff_diff_pixels_mmx; - } - - if (EXTERNAL_SSE2(cpu_flags)) - if (!high_bit_depth) - c->get_pixels = ff_get_pixels_sse2; - #if HAVE_INLINE_ASM if (INLINE_MMX(cpu_flags)) { c->vsad[4] = vsad_intra16_mmx; @@ -410,7 +394,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, if (EXTERNAL_SSE2(cpu_flags)) { c->sse[0] = ff_sse16_sse2; c->sum_abs_dctelem = ff_sum_abs_dctelem_sse2; - c->diff_pixels = ff_diff_pixels_sse2; #if HAVE_ALIGNED_STACK c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2; diff --git a/libavcodec/x86/pixblockdsp.asm b/libavcodec/x86/pixblockdsp.asm new file mode 100644 index 0000000000..00ee9b4ac2 --- /dev/null +++ b/libavcodec/x86/pixblockdsp.asm @@ -0,0 +1,135 @@ +;***************************************************************************** +;* SIMD-optimized pixel operations +;***************************************************************************** +;* Copyright (c) 2000, 2001 Fabrice Bellard +;* Copyright (c) 2002-2004 Michael Niedermayer +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;***************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION .text + +INIT_MMX mmx +; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size) +cglobal get_pixels, 3,4 + movsxdifnidn r2, r2d + add r0, 128 + mov r3, -128 + pxor m7, m7 +.loop: + mova m0, [r1] + mova m2, [r1+r2] + mova m1, m0 + mova m3, m2 + punpcklbw m0, m7 + punpckhbw m1, m7 + punpcklbw m2, m7 + punpckhbw m3, m7 + mova [r0+r3+ 0], m0 + mova [r0+r3+ 8], m1 + mova [r0+r3+16], m2 + mova [r0+r3+24], m3 + lea r1, [r1+r2*2] + add r3, 32 + js .loop + REP_RET + +INIT_XMM sse2 +cglobal get_pixels, 3, 4, 5 + movsxdifnidn r2, r2d + lea r3, [r2*3] + pxor m4, m4 + movh m0, [r1] + movh m1, [r1+r2] + movh m2, [r1+r2*2] + movh m3, [r1+r3] + lea r1, [r1+r2*4] + punpcklbw m0, m4 + punpcklbw m1, m4 + punpcklbw m2, m4 + punpcklbw m3, m4 + mova [r0], m0 + mova [r0+0x10], m1 + mova [r0+0x20], m2 + mova [r0+0x30], m3 + movh m0, [r1] + movh m1, [r1+r2*1] + movh m2, [r1+r2*2] + movh m3, [r1+r3] + punpcklbw m0, m4 + punpcklbw m1, m4 + punpcklbw m2, m4 + punpcklbw m3, m4 + mova [r0+0x40], m0 + mova [r0+0x50], m1 + mova [r0+0x60], m2 + mova [r0+0x70], m3 + RET + +INIT_MMX mmx +; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, +; int stride); +cglobal diff_pixels, 4,5 + movsxdifnidn r3, r3d + pxor m7, m7 + add r0, 128 + mov r4, -128 +.loop: + mova m0, [r1] + mova m2, [r2] + mova m1, m0 + mova m3, m2 + punpcklbw m0, m7 + punpckhbw m1, m7 + punpcklbw m2, m7 + punpckhbw m3, m7 + psubw m0, m2 + psubw m1, m3 + mova [r0+r4+0], m0 + mova [r0+r4+8], m1 + add r1, r3 + add r2, r3 + add r4, 16 + jne .loop + REP_RET + +INIT_XMM sse2 +cglobal diff_pixels, 4, 5, 5 + movsxdifnidn r3, r3d + pxor m4, m4 + add r0, 128 + mov r4, -128 +.loop: + movh m0, [r1] + movh m2, [r2] + movh m1, [r1+r3] + movh m3, [r2+r3] + punpcklbw m0, m4 + punpcklbw m1, m4 + punpcklbw m2, m4 + punpcklbw m3, m4 + psubw m0, m2 + psubw m1, m3 + mova [r0+r4+0 ], m0 + mova [r0+r4+16], m1 + lea r1, [r1+r3*2] + lea r2, [r2+r3*2] + add r4, 32 + jne .loop + RET diff --git a/libavcodec/x86/pixblockdsp_init.c b/libavcodec/x86/pixblockdsp_init.c new file mode 100644 index 0000000000..4c31b802ff --- /dev/null +++ b/libavcodec/x86/pixblockdsp_init.c @@ -0,0 +1,50 @@ +/* + * SIMD-optimized pixel operations + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/pixblockdsp.h" + +void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size); +void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size); +void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, + int stride); +void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2, + int stride); + +av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c, + AVCodecContext *avctx, + unsigned high_bit_depth) +{ + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_MMX(cpu_flags)) { + if (!high_bit_depth) + c->get_pixels = ff_get_pixels_mmx; + c->diff_pixels = ff_diff_pixels_mmx; + } + + if (EXTERNAL_SSE2(cpu_flags)) { + if (!high_bit_depth) + c->get_pixels = ff_get_pixels_sse2; + c->diff_pixels = ff_diff_pixels_sse2; + } +} diff --git a/libavfilter/vf_mpdecimate.c b/libavfilter/vf_mpdecimate.c index 099622a029..c667a9f4cc 100644 --- a/libavfilter/vf_mpdecimate.c +++ b/libavfilter/vf_mpdecimate.c @@ -28,6 +28,7 @@ #include "libavutil/pixdesc.h" #include "libavutil/timestamp.h" #include "libavcodec/dsputil.h" +#include "libavcodec/pixblockdsp.h" #include "avfilter.h" #include "internal.h" #include "formats.h" @@ -49,6 +50,7 @@ typedef struct { int hsub, vsub; ///< chroma subsampling values AVFrame *ref; ///< reference picture DSPContext dspctx; ///< context providing optimized diff routines + PixblockDSPContext pdsp; AVCodecContext *avctx; ///< codec context required for the DSPContext } DecimateContext; @@ -75,6 +77,7 @@ static int diff_planes(AVFilterContext *ctx, { DecimateContext *decimate = ctx->priv; DSPContext *dspctx = &decimate->dspctx; + PixblockDSPContext *pdsp = &decimate->pdsp; int x, y; int d, c = 0; @@ -84,7 +87,7 @@ static int diff_planes(AVFilterContext *ctx, /* compute difference for blocks of 8x8 bytes */ for (y = 0; y < h-7; y += 4) { for (x = 8; x < w-7; x += 4) { - dspctx->diff_pixels(block, + pdsp->diff_pixels(block, cur+x+y*linesize, ref+x+y*linesize, linesize); d = dspctx->sum_abs_dctelem(block); @@ -141,6 +144,7 @@ static av_cold int init(AVFilterContext *ctx) if (!decimate->avctx) return AVERROR(ENOMEM); avpriv_dsputil_init(&decimate->dspctx, decimate->avctx); + ff_pixblockdsp_init(&decimate->pdsp, decimate->avctx); return 0; } diff --git a/libavfilter/vf_spp.c b/libavfilter/vf_spp.c index 9df87ff3f2..4e4a5795f4 100644 --- a/libavfilter/vf_spp.c +++ b/libavfilter/vf_spp.c @@ -232,7 +232,7 @@ static void filter(SPPContext *p, uint8_t *dst, uint8_t *src, const int x1 = x + offset[i + count - 1][0]; const int y1 = y + offset[i + count - 1][1]; const int index = x1 + y1*linesize; - p->dsp.get_pixels(block, p->src + index, linesize); + p->pdsp.get_pixels(block, p->src + index, linesize); p->fdsp.fdct(block); p->requantize(block2, block, qp, p->idsp.idct_permutation); p->idsp.idct(block2); @@ -380,9 +380,9 @@ static av_cold int init(AVFilterContext *ctx) spp->avctx = avcodec_alloc_context3(NULL); if (!spp->avctx) return AVERROR(ENOMEM); - avpriv_dsputil_init(&spp->dsp, spp->avctx); ff_idctdsp_init(&spp->idsp, spp->avctx); ff_fdctdsp_init(&spp->fdsp, spp->avctx); + ff_pixblockdsp_init(&spp->pdsp, spp->avctx); spp->store_slice = store_slice_c; switch (spp->mode) { case MODE_HARD: spp->requantize = hardthresh_c; break; diff --git a/libavfilter/vf_spp.h b/libavfilter/vf_spp.h index 909d4de812..c8eac3caf2 100644 --- a/libavfilter/vf_spp.h +++ b/libavfilter/vf_spp.h @@ -23,7 +23,7 @@ #define AVFILTER_SPP_H #include "libavcodec/avcodec.h" -#include "libavcodec/dsputil.h" +#include "libavcodec/pixblockdsp.h" #include "libavcodec/idctdsp.h" #include "libavcodec/fdctdsp.h" #include "avfilter.h" @@ -41,9 +41,9 @@ typedef struct { uint8_t *src; int16_t *temp; AVCodecContext *avctx; - DSPContext dsp; IDCTDSPContext idsp; FDCTDSPContext fdsp; + PixblockDSPContext pdsp; int8_t *non_b_qp_table; int non_b_qp_alloc_size; int use_bframe_qp;