From 7f685d0f493bbfa44cf1b3b65f9347291e23872b Mon Sep 17 00:00:00 2001 From: Kyle Swanson Date: Mon, 25 Sep 2023 13:14:13 +0100 Subject: [PATCH] avfilter: add libvmaf_cuda Signed-off-by: Kyle Swanson --- configure | 2 + doc/filters.texi | 26 +++++ libavfilter/Makefile | 1 + libavfilter/allfilters.c | 1 + libavfilter/vf_libvmaf.c | 210 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 240 insertions(+) diff --git a/configure b/configure index 1ee8409617..6f9b223481 100755 --- a/configure +++ b/configure @@ -3833,6 +3833,7 @@ vflip_vulkan_filter_deps="vulkan spirv_compiler" vidstabdetect_filter_deps="libvidstab" vidstabtransform_filter_deps="libvidstab" libvmaf_filter_deps="libvmaf" +libvmaf_cuda_filter_deps="libvmaf libvmaf_cuda ffnvcodec" zmq_filter_deps="libzmq" zoompan_filter_deps="swscale" zscale_filter_deps="libzimg const_nan" @@ -6811,6 +6812,7 @@ enabled libuavs3d && require_pkg_config libuavs3d "uavs3d >= 1.1.41" uav enabled libv4l2 && require_pkg_config libv4l2 libv4l2 libv4l2.h v4l2_ioctl enabled libvidstab && require_pkg_config libvidstab "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit enabled libvmaf && require_pkg_config libvmaf "libvmaf >= 2.0.0" libvmaf.h vmaf_init +enabled libvmaf && check_pkg_config libvmaf_cuda "libvmaf >= 2.0.0" libvmaf_cuda.h vmaf_cuda_state_init enabled libvo_amrwbenc && require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_init -lvo-amrwbenc enabled libvorbis && require_pkg_config libvorbis vorbis vorbis/codec.h vorbis_info_init && require_pkg_config libvorbisenc vorbisenc vorbis/vorbisenc.h vorbis_encode_init diff --git a/doc/filters.texi b/doc/filters.texi index 14a6be49ac..c25450cf6c 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -16928,6 +16928,32 @@ ffmpeg -i distorted.mpg -i reference.mkv -lavfi "[0:v]settb=AVTB,setpts=PTS-STAR @end example @end itemize +@section libvmaf_cuda + +This is the CUDA variant of the @ref{libvmaf} filter. It only accepts CUDA frames. + +It requires Netflix's vmaf library (libvmaf) as a pre-requisite. +After installing the library it can be enabled using: +@code{./configure --enable-nonfree --enable-ffnvcodec --enable-libvmaf}. + +@subsection Examples +@itemize + +@item +Basic usage showing CUVID hardware decoding and CUDA scaling with @ref{scale_cuda}: +@example +ffmpeg \ + -hwaccel cuda -hwaccel_output_format cuda -codec:v av1_cuvid -i dis.obu \ + -hwaccel cuda -hwaccel_output_format cuda -codec:v av1_cuvid -i ref.obu \ + -filter_complex " + [0:v]scale_cuda=format=yuv420p[ref]; \ + [1:v]scale_cuda=format=yuv420p[dis]; \ + [dis][ref]libvmaf_cuda=log_fmt=json:log_path=output.json + " \ + -f null - +@end example +@end itemize + @section limitdiff Apply limited difference filter using second and optionally third video stream. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 2fe0033b21..57f5809acb 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -363,6 +363,7 @@ OBJS-$(CONFIG_LENSCORRECTION_FILTER) += vf_lenscorrection.o OBJS-$(CONFIG_LENSFUN_FILTER) += vf_lensfun.o OBJS-$(CONFIG_LIBPLACEBO_FILTER) += vf_libplacebo.o vulkan.o vulkan_filter.o OBJS-$(CONFIG_LIBVMAF_FILTER) += vf_libvmaf.o framesync.o +OBJS-$(CONFIG_LIBVMAF_CUDA_FILTER) += vf_libvmaf.o framesync.o OBJS-$(CONFIG_LIMITDIFF_FILTER) += vf_limitdiff.o framesync.o OBJS-$(CONFIG_LIMITER_FILTER) += vf_limiter.o OBJS-$(CONFIG_LOOP_FILTER) += f_loop.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index d4184d6e80..aa49703c6e 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -339,6 +339,7 @@ extern const AVFilter ff_vf_lenscorrection; extern const AVFilter ff_vf_lensfun; extern const AVFilter ff_vf_libplacebo; extern const AVFilter ff_vf_libvmaf; +extern const AVFilter ff_vf_libvmaf_cuda; extern const AVFilter ff_vf_limitdiff; extern const AVFilter ff_vf_limiter; extern const AVFilter ff_vf_loop; diff --git a/libavfilter/vf_libvmaf.c b/libavfilter/vf_libvmaf.c index 67f0d6a22f..2726b061ac 100644 --- a/libavfilter/vf_libvmaf.c +++ b/libavfilter/vf_libvmaf.c @@ -24,6 +24,8 @@ * Calculate the VMAF between two input videos. */ +#include "config_components.h" + #include #include "libavutil/avstring.h" @@ -36,6 +38,13 @@ #include "internal.h" #include "video.h" +#if CONFIG_LIBVMAF_CUDA_FILTER +#include + +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_cuda_internal.h" +#endif + typedef struct LIBVMAFContext { const AVClass *class; FFFrameSync fs; @@ -58,6 +67,9 @@ typedef struct LIBVMAFContext { unsigned model_cnt; unsigned frame_cnt; unsigned bpc; +#if CONFIG_LIBVMAF_CUDA_FILTER + VmafCudaState *cu_state; +#endif } LIBVMAFContext; #define OFFSET(x) offsetof(LIBVMAFContext, x) @@ -717,3 +729,201 @@ const AVFilter ff_vf_libvmaf = { FILTER_OUTPUTS(libvmaf_outputs), FILTER_PIXFMTS_ARRAY(pix_fmts), }; + +#if CONFIG_LIBVMAF_CUDA_FILTER +static const enum AVPixelFormat supported_formats[] = { + AV_PIX_FMT_YUV420P, + AV_PIX_FMT_YUV444P16, +}; + +static int format_is_supported(enum AVPixelFormat fmt) +{ + int i; + + for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) + if (supported_formats[i] == fmt) + return 1; + return 0; +} + +static int config_props_cuda(AVFilterLink *outlink) +{ + int err; + AVFilterContext *ctx = outlink->src; + LIBVMAFContext *s = ctx->priv; + AVFilterLink *inlink = ctx->inputs[0]; + AVHWFramesContext *frames_ctx = (AVHWFramesContext*) inlink->hw_frames_ctx->data; + AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; + CUcontext cu_ctx = device_hwctx->cuda_ctx; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frames_ctx->sw_format); + + VmafConfiguration cfg = { + .log_level = log_level_map(av_log_get_level()), + .n_subsample = s->n_subsample, + .n_threads = s->n_threads, + }; + + VmafCudaPictureConfiguration cuda_pic_cfg = { + .pic_params = { + .bpc = desc->comp[0].depth, + .w = inlink->w, + .h = inlink->h, + .pix_fmt = pix_fmt_map(frames_ctx->sw_format), + }, + .pic_prealloc_method = VMAF_CUDA_PICTURE_PREALLOCATION_METHOD_DEVICE, + }; + + VmafCudaConfiguration cuda_cfg = { + .cu_ctx = cu_ctx, + }; + + if (!format_is_supported(frames_ctx->sw_format)) { + av_log(s, AV_LOG_ERROR, + "Unsupported input format: %s\n", desc->name); + return AVERROR(EINVAL); + } + + err = vmaf_init(&s->vmaf, cfg); + if (err) + return AVERROR(EINVAL); + + err = vmaf_cuda_state_init(&s->cu_state, cuda_cfg); + if (err) + return AVERROR(EINVAL); + + err = vmaf_cuda_import_state(s->vmaf, s->cu_state); + if (err) + return AVERROR(EINVAL); + + err = vmaf_cuda_preallocate_pictures(s->vmaf, cuda_pic_cfg); + if (err < 0) + return err; + + err = parse_deprecated_options(ctx); + if (err) + return err; + + err = parse_models(ctx); + if (err) + return err; + + err = parse_features(ctx); + if (err) + return err; + + return config_output(outlink); +} + +static int copy_picture_data_cuda(VmafContext* vmaf, + AVCUDADeviceContext* device_hwctx, + AVFrame* src, VmafPicture* dst, + enum AVPixelFormat pix_fmt) +{ + const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(pix_fmt); + CudaFunctions *cu = device_hwctx->internal->cuda_dl; + + CUDA_MEMCPY2D m = { + .srcMemoryType = CU_MEMORYTYPE_DEVICE, + .dstMemoryType = CU_MEMORYTYPE_DEVICE, + }; + + int err = vmaf_cuda_fetch_preallocated_picture(vmaf, dst); + if (err) + return AVERROR(ENOMEM); + + err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx); + if (err) + return AVERROR_EXTERNAL; + + for (unsigned i = 0; i < pix_desc->nb_components; i++) { + m.srcDevice = (CUdeviceptr) src->data[i]; + m.srcPitch = src->linesize[i]; + m.dstDevice = (CUdeviceptr) dst->data[i]; + m.dstPitch = dst->stride[i]; + m.WidthInBytes = dst->w[i] * ((dst->bpc + 7) / 8); + m.Height = dst->h[i]; + + err = cu->cuMemcpy2D(&m); + if (err) + return AVERROR_EXTERNAL; + break; + } + + err = cu->cuCtxPopCurrent(NULL); + if (err) + return AVERROR_EXTERNAL; + + return 0; +} + +static int do_vmaf_cuda(FFFrameSync* fs) +{ + AVFilterContext* ctx = fs->parent; + LIBVMAFContext* s = ctx->priv; + AVFilterLink *inlink = ctx->inputs[0]; + AVHWFramesContext *frames_ctx = (AVHWFramesContext*) inlink->hw_frames_ctx->data; + AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; + VmafPicture pic_ref, pic_dist; + AVFrame *ref, *dist; + + int err = 0; + + err = ff_framesync_dualinput_get(fs, &dist, &ref); + if (err < 0) + return err; + if (ctx->is_disabled || !ref) + return ff_filter_frame(ctx->outputs[0], dist); + + err = copy_picture_data_cuda(s->vmaf, device_hwctx, ref, &pic_ref, + frames_ctx->sw_format); + if (err) { + av_log(s, AV_LOG_ERROR, "problem during copy_picture_data_cuda.\n"); + return AVERROR(ENOMEM); + } + + err = copy_picture_data_cuda(s->vmaf, device_hwctx, dist, &pic_dist, + frames_ctx->sw_format); + if (err) { + av_log(s, AV_LOG_ERROR, "problem during copy_picture_data_cuda.\n"); + return AVERROR(ENOMEM); + } + + err = vmaf_read_pictures(s->vmaf, &pic_ref, &pic_dist, s->frame_cnt++); + if (err) { + av_log(s, AV_LOG_ERROR, "problem during vmaf_read_pictures.\n"); + return AVERROR(EINVAL); + } + + return ff_filter_frame(ctx->outputs[0], dist); +} + +static av_cold int init_cuda(AVFilterContext *ctx) +{ + LIBVMAFContext *s = ctx->priv; + s->fs.on_event = do_vmaf_cuda; + return 0; +} + +static const AVFilterPad libvmaf_outputs_cuda[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = config_props_cuda, + }, +}; + +const AVFilter ff_vf_libvmaf_cuda = { + .name = "libvmaf_cuda", + .description = NULL_IF_CONFIG_SMALL("Calculate the VMAF between two video streams."), + .preinit = libvmaf_framesync_preinit, + .init = init_cuda, + .uninit = uninit, + .activate = activate, + .priv_size = sizeof(LIBVMAFContext), + .priv_class = &libvmaf_class, + FILTER_INPUTS(libvmaf_inputs), + FILTER_OUTPUTS(libvmaf_outputs_cuda), + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_CUDA), + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; +#endif