sws: Support error diffusion dither for mono output

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Michael Niedermayer 2013-01-18 23:01:08 +01:00
parent 24bbc416a4
commit 646ade7679
7 changed files with 80 additions and 4 deletions

View File

@ -4,6 +4,7 @@ releases are sorted from youngest to oldest.
version <next>:
- VDPAU hardware acceleration through normal hwaccel
- SRTP support
- Error diffusion dither in Swscale
version 1.1:

View File

@ -333,6 +333,7 @@ yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
const uint8_t * const d128=dither_8x8_220[y&7];
int i;
unsigned acc = 0;
int err = 0;
for (i = 0; i < dstW; i += 2) {
int j;
@ -349,12 +350,25 @@ yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
Y1 = av_clip_uint8(Y1);
Y2 = av_clip_uint8(Y2);
}
accumulate_bit(acc, Y1 + d128[(i + 0) & 7]);
accumulate_bit(acc, Y2 + d128[(i + 1) & 7]);
if (c->flags & SWS_ERROR_DIFFUSION) {
Y1 += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2])>>4;
c->dither_error[0][i] = err;
acc = 2*acc + (Y1 >= 128);
Y1 -= 220*(acc&1);
err = Y2 + ((7*Y1 + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3])>>4);
c->dither_error[0][i+1] = Y1;
acc = 2*acc + (err >= 128);
err -= 220*(acc&1);
} else {
accumulate_bit(acc, Y1 + d128[(i + 0) & 7]);
accumulate_bit(acc, Y2 + d128[(i + 1) & 7]);
}
if ((i & 7) == 6) {
output_pixel(*dest++, acc);
}
}
c->dither_error[0][i] = err;
if (i & 6) {
output_pixel(*dest, acc);
@ -373,6 +387,29 @@ yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
int yalpha1 = 4096 - yalpha;
int i;
if (c->flags & SWS_ERROR_DIFFUSION) {
int err = 0;
int acc = 0;
for (i = 0; i < dstW; i +=2) {
int Y;
Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19;
Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2])>>4;
c->dither_error[0][i] = err;
acc = 2*acc + (Y >= 128);
Y -= 220*(acc&1);
err = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19;
err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3])>>4;
c->dither_error[0][i+1] = Y;
acc = 2*acc + (err >= 128);
err -= 220*(acc&1);
if ((i & 7) == 6)
output_pixel(*dest++, acc);
}
c->dither_error[0][i] = err;
} else {
for (i = 0; i < dstW; i += 8) {
int Y, acc = 0;
@ -395,6 +432,7 @@ yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2],
output_pixel(*dest++, acc);
}
}
}
static av_always_inline void
@ -406,9 +444,31 @@ yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
const uint8_t * const d128 = dither_8x8_220[y & 7];
int i;
if (c->flags & SWS_ERROR_DIFFUSION) {
int err = 0;
int acc = 0;
for (i = 0; i < dstW; i +=2) {
int Y;
Y = ((buf0[i + 0] + 64) >> 7);
Y += (7*err + 1*c->dither_error[0][i] + 5*c->dither_error[0][i+1] + 3*c->dither_error[0][i+2])>>4;
c->dither_error[0][i] = err;
acc = 2*acc + (Y >= 128);
Y -= 220*(acc&1);
err = ((buf0[i + 1] + 64) >> 7);
err += (7*Y + 1*c->dither_error[0][i+1] + 5*c->dither_error[0][i+2] + 3*c->dither_error[0][i+3])>>4;
c->dither_error[0][i+1] = Y;
acc = 2*acc + (err >= 128);
err -= 220*(acc&1);
if ((i & 7) == 6)
output_pixel(*dest++, acc);
}
c->dither_error[0][i] = err;
} else {
for (i = 0; i < dstW; i += 8) {
int acc = 0;
accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]);
accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]);
accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]);
@ -420,6 +480,7 @@ yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0,
output_pixel(*dest++, acc);
}
}
}
#undef output_pixel

View File

@ -910,6 +910,11 @@ int attribute_align_arg sws_scale(struct SwsContext *c,
src2[0] = base;
}
if (!srcSliceY && (c->flags & SWS_ERROR_DIFFUSION))
for (i = 0; i < 4; i++)
memset(c->dither_error[i], 0, sizeof(c->dither_error[0][0]) * (c->dstW+2));
// copy strides, so they can safely be modified
if (c->sliceDir == 1) {
// slices go from top to bottom

View File

@ -80,6 +80,7 @@ const char *swscale_license(void);
//input subsampling info
#define SWS_FULL_CHR_H_INP 0x4000
#define SWS_DIRECT_BGR 0x8000
#define SWS_ERROR_DIFFUSION 0x20000
#define SWS_ACCURATE_RND 0x40000
#define SWS_BITEXACT 0x80000

View File

@ -327,6 +327,8 @@ typedef struct SwsContext {
int table_gV[256 + 2*YUVRGB_TABLE_HEADROOM];
uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM];
int *dither_error[4];
//Colorspace stuff
int contrast, brightness, saturation; // for sws_getColorspaceDetails
int srcColorspaceTable[4];

View File

@ -936,7 +936,7 @@ void ff_get_unscaled_swscale(SwsContext *c)
/* yuv2bgr */
if ((srcFormat == AV_PIX_FMT_YUV420P || srcFormat == AV_PIX_FMT_YUV422P ||
srcFormat == AV_PIX_FMT_YUVA420P) && isAnyRGB(dstFormat) &&
!(flags & SWS_ACCURATE_RND) && !(dstH & 1)) {
!(flags & (SWS_ACCURATE_RND|SWS_ERROR_DIFFUSION)) && !(dstH & 1)) {
c->swScale = ff_yuv2rgb_get_func_ptr(c);
}

View File

@ -1247,6 +1247,9 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
c->vChrFilterPos[chrI];
}
for (i = 0; i < 4; i++)
FF_ALLOCZ_OR_GOTO(c, c->dither_error[i], (c->dstW+2) * sizeof(int), fail);
/* Allocate pixbufs (we use dynamic allocation because otherwise we would
* need to allocate several megabytes to handle all possible cases) */
FF_ALLOC_OR_GOTO(c, c->lumPixBuf, c->vLumBufSize * 3 * sizeof(int16_t *), fail);
@ -1740,6 +1743,9 @@ void sws_freeContext(SwsContext *c)
av_freep(&c->alpPixBuf);
}
for (i = 0; i < 4; i++)
av_freep(&c->dither_error[i]);
av_freep(&c->vLumFilter);
av_freep(&c->vChrFilter);
av_freep(&c->hLumFilter);