FFmpeg/libavfilter/bwdif.h
John Cox 697533e76d avfilter/vf_bwdif: Add a filter_line3 method for optimisation
Add an optional filter_line3 to the available optimisations.

filter_line3 is equivalent to filter_line, memcpy, filter_line

filter_line shares quite a number of loads and some calculations in
common with its next iteration and testing shows that using aarch64
neon filter_line3s performance is 30% better than two filter_lines
and a memcpy.

Adds a test for vf_bwdif filter_line3 to checkasm

Rounds job start lines down to a multiple of 4. This means that if
filter_line3 exists then filter_line will not sometimes be called
once at the end of a slice depending on thread count. The final slice
may do up to 3 extra lines but filter_edge is faster than filter_line
so it is unlikely to create any noticable thread load variation.

Signed-off-by: John Cox <jc@kynesim.co.uk>
Signed-off-by: Martin Storsjö <martin@martin.st>
2023-07-06 00:21:05 +03:00

64 lines
2.8 KiB
C

/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVFILTER_BWDIF_H
#define AVFILTER_BWDIF_H
#include "libavutil/pixdesc.h"
#include "avfilter.h"
#include "yadif.h"
typedef struct BWDIFContext {
YADIFContext yadif;
void (*filter_intra)(void *dst1, void *cur1, int w, int prefs, int mrefs,
int prefs3, int mrefs3, int parity, int clip_max);
void (*filter_line)(void *dst, void *prev, void *cur, void *next,
int w, int prefs, int mrefs, int prefs2, int mrefs2,
int prefs3, int mrefs3, int prefs4, int mrefs4,
int parity, int clip_max);
void (*filter_edge)(void *dst, void *prev, void *cur, void *next,
int w, int prefs, int mrefs, int prefs2, int mrefs2,
int parity, int clip_max, int spat);
void (*filter_line3)(void *dst, int dstride,
const void *prev, const void *cur, const void *next, int prefs,
int w, int parity, int clip_max);
} BWDIFContext;
void ff_bwdif_init_filter_line(BWDIFContext *bwdif, int bit_depth);
void ff_bwdif_init_x86(BWDIFContext *bwdif, int bit_depth);
void ff_bwdif_init_aarch64(BWDIFContext *bwdif, int bit_depth);
void ff_bwdif_filter_edge_c(void *dst1, void *prev1, void *cur1, void *next1,
int w, int prefs, int mrefs, int prefs2, int mrefs2,
int parity, int clip_max, int spat);
void ff_bwdif_filter_intra_c(void *dst1, void *cur1, int w, int prefs, int mrefs,
int prefs3, int mrefs3, int parity, int clip_max);
void ff_bwdif_filter_line_c(void *dst1, void *prev1, void *cur1, void *next1,
int w, int prefs, int mrefs, int prefs2, int mrefs2,
int prefs3, int mrefs3, int prefs4, int mrefs4,
int parity, int clip_max);
void ff_bwdif_filter_line3_c(void * dst1, int d_stride,
const void * prev1, const void * cur1, const void * next1, int s_stride,
int w, int parity, int clip_max);
#endif /* AVFILTER_BWDIF_H */