From 4364e1f150971c3023db7981c01193b36c8f5b17 Mon Sep 17 00:00:00 2001 From: Georg Martius Date: Fri, 19 Apr 2013 01:49:27 +0200 Subject: [PATCH] lavfi: add video stabilization plugins using vid.stab library vidstabdetect and vidstabtransform common functions for interfacing vid.stab are in libavfilter/vidstabutils.c Signed-off-by: Georg Martius --- Changelog | 2 + LICENSE | 1 + configure | 6 + doc/filters.texi | 222 ++++++++++++++++++++++ libavfilter/Makefile | 2 + libavfilter/allfilters.c | 2 + libavfilter/vf_vidstabdetect.c | 239 ++++++++++++++++++++++++ libavfilter/vf_vidstabtransform.c | 294 ++++++++++++++++++++++++++++++ libavfilter/vidstabutils.c | 84 +++++++++ libavfilter/vidstabutils.h | 36 ++++ 10 files changed, 888 insertions(+) create mode 100644 libavfilter/vf_vidstabdetect.c create mode 100644 libavfilter/vf_vidstabtransform.c create mode 100644 libavfilter/vidstabutils.c create mode 100644 libavfilter/vidstabutils.h diff --git a/Changelog b/Changelog index 3e8293f0ae..f71e0d8ce0 100644 --- a/Changelog +++ b/Changelog @@ -31,6 +31,8 @@ version : - asetrate filter - interleave filter - timeline editing with filters +- vidstabdetect and vidstabtransform filters for video stabilization using + the vid.stab library version 1.2: diff --git a/LICENSE b/LICENSE index 2d91bc08db..87607530c6 100644 --- a/LICENSE +++ b/LICENSE @@ -72,6 +72,7 @@ compatible libraries The following libraries are under GPL: - libcdio - libutvideo + - libvidstab - libx264 - libxavs - libxvid diff --git a/configure b/configure index e5f47b0c07..648db2ff85 100755 --- a/configure +++ b/configure @@ -226,6 +226,7 @@ External library support: --enable-libtwolame enable MP2 encoding via libtwolame [no] --enable-libutvideo enable Ut Video encoding and decoding via libutvideo [no] --enable-libv4l2 enable libv4l2/v4l-utils [no] + --enable-libvidstab enable video stabilization using vid.stab [no] --enable-libvo-aacenc enable AAC encoding via libvo-aacenc [no] --enable-libvo-amrwbenc enable AMR-WB encoding via libvo-amrwbenc [no] --enable-libvorbis enable Vorbis en/decoding via libvorbis, @@ -1181,6 +1182,7 @@ EXTERNAL_LIBRARY_LIST=" libtwolame libutvideo libv4l2 + libvidstab libvo_aacenc libvo_amrwbenc libvorbis @@ -2152,6 +2154,8 @@ stereo3d_filter_deps="gpl" subtitles_filter_deps="avformat avcodec libass" super2xsai_filter_deps="gpl" tinterlace_filter_deps="gpl" +vidstabdetect_filter_deps="libvidstab" +vidstabtransform_filter_deps="libvidstab" yadif_filter_deps="gpl" pixfmts_super2xsai_test_deps="super2xsai_filter" tinterlace_merge_test_deps="tinterlace_filter" @@ -3573,6 +3577,7 @@ die_license_disabled_gpl() { die_license_disabled gpl libcdio die_license_disabled gpl libutvideo +die_license_disabled gpl libvidstab die_license_disabled gpl libx264 die_license_disabled gpl libxavs die_license_disabled gpl libxvid @@ -4015,6 +4020,7 @@ enabled libtwolame && require libtwolame twolame.h twolame_init -ltwolame && die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; } enabled libutvideo && require_cpp utvideo "stdint.h stdlib.h utvideo/utvideo.h utvideo/Codec.h" 'CCodec*' -lutvideo -lstdc++ enabled libv4l2 && require_pkg_config libv4l2 libv4l2.h v4l2_ioctl +enabled libvidstab && require_pkg_config vidstab vid.stab/libvidstab.h vsMotionDetectInit enabled libvo_aacenc && require libvo_aacenc vo-aacenc/voAAC.h voGetAACEncAPI -lvo-aacenc enabled libvo_amrwbenc && require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_init -lvo-amrwbenc enabled libvorbis && require libvorbis vorbis/vorbisenc.h vorbis_info_init -lvorbisenc -lvorbis -logg diff --git a/doc/filters.texi b/doc/filters.texi index 81cdb05fec..ab9e227063 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -5308,6 +5308,141 @@ much, but it will increase the amount of blurring needed to cover over the image and will destroy more information than necessary, and extra pixels will slow things down on a large logo. +@anchor{vidstabtransform} +@section vidstabtransform + +Video stabilization/deshaking: pass 2 of 2, +see @ref{vidstabdetect} for pass 1. + +Read a file with transform information for each frame and +apply/compensate them. Together with the @ref{vidstabdetect} +filter this can be used to deshake videos. See also +@url{http://public.hronopik.de/vid.stab}. It is important to also use +the unsharp filter, see below. + +To enable compilation of this filter you need to configure FFmpeg with +@code{--enable-libvidstab}. + +This filter accepts the following named options, expressed as a +sequence of @var{key}=@var{value} pairs, separated by ":". + +@table @option + +@item input +path to the file used to read the transforms (default: @file{transforms.trf}) + +@item smoothing +number of frames (value*2 + 1) used for lowpass filtering the camera movements +(default: 10). For example a number of 10 means that 21 frames are used +(10 in the past and 10 in the future) to smoothen the motion in the +video. A larger values leads to a smoother video, but limits the +acceleration of the camera (pan/tilt movements). + +@item maxshift +maximal number of pixels to translate frames (default: -1 no limit) + +@item maxangle +maximal angle in radians (degree*PI/180) to rotate frames (default: -1 +no limit) + +@item crop +How to deal with borders that may be visible due to movement +compensation. Available values are: + +@table @samp +@item keep +keep image information from previous frame (default) +@item black +fill the border black +@end table + +@item invert +@table @samp +@item 0 + keep transforms normal (default) +@item 1 + invert transforms +@end table + + +@item relative +consider transforms as +@table @samp +@item 0 + absolute +@item 1 + relative to previous frame (default) +@end table + + +@item zoom +percentage to zoom (default: 0) +@table @samp +@item >0 + zoom in +@item <0 + zoom out +@end table + +@item optzoom +if 1 then optimal zoom value is determined (default). +Optimal zoom means no (or only little) border should be visible. +Note that the value given at zoom is added to the one calculated +here. + +@item interpol +type of interpolation + +Available values are: +@table @samp +@item no +no interpolation +@item linear +linear only horizontal +@item bilinear +linear in both directions (default) +@item bicubic +cubic in both directions (slow) +@end table + +@item tripod +virtual tripod mode means that the video is stabilized such that the +camera stays stationary. Use also @code{tripod} option of +@ref{vidstabdetect}. +@table @samp +@item 0 +off (default) +@item 1 +virtual tripod mode: equivalent to @code{relative=0:smoothing=0} +@end table + +@end table + +@subsection Examples + +@itemize +@item +typical call with default default values: + (note the unsharp filter which is always recommended) +@example +ffmpeg -i inp.mpeg -vf vidstabtransform,unsharp=5:5:0.8:3:3:0.4 inp_stabilized.mpeg +@end example + +@item +zoom in a bit more and load transform data from a given file +@example +vidstabtransform=zoom=5:input="mytransforms.trf" +@end example + +@item +smoothen the video even more +@example +vidstabtransform=smoothing=30 +@end example + +@end itemize + + @section scale Scale (resize) the input video, using the libswscale library. @@ -5706,6 +5841,93 @@ in [-30,0] will filter edges. Default value is 0. If a chroma option is not explicitly set, the corresponding luma value is set. +@anchor{vidstabdetect} +@section vidstabdetect + +Video stabilization/deshaking: pass 1 of 2, see @ref{vidstabtransform} +for pass 2. + +Generates a file with relative transform information translation, +rotation about subsequent frames. + +To enable compilation of this filter you need to configure FFmpeg with +@code{--enable-libvidstab}. + +This filter accepts the following named options, expressed as a +sequence of @var{key}=@var{value} pairs, separated by ":". + +@table @option +@item result +path to the file used to write the transforms (default:@file{transforms.trf}) + +@item shakiness +how shaky is the video and how quick is the camera? (default: 5) +@table @samp +@item 1 + little (fast) +@item ... +@item 10 + very strong/quick (slow) +@end table + +@item accuracy +accuracy of detection process (>=shakiness) (default: 9) +@table @samp +@item 1 + low (fast) +@item 15 + high (slow) +@end table + +@item stepsize +stepsize of search process, region around minimum is scanned with 1 pixel +resolution (default: 6) + +@item mincontrast +below this contrast a local measurement field is discarded (0-1) (default: 0.3) + +@item tripod +virtual tripod mode: @code{tripod=framenum} if framenum>0 otherwise disabled. +The motion of the frames is compared to a reference frame (framenum). +The idea is to compensate all movements in a more-or-less static scene + and keep the camera view absolutely still. +(default: 0 (disabled)) + +@item show +draw nothing (default); 1,2: show fields and transforms in the resulting frames + +@end table + +@subsection Examples + +@itemize +@item +use default values: +@example +vidstabdetect +@end example + +@item +strongly shaky movie and put the results in @code{mytransforms.trf} +@example +vidstabdetect=shakiness=10:accuracy=15:result="mytransforms.trf" +@end example + +@item +visualize some internals in the resulting video +@example +vidstabdetect=show=1 +@end example + + +@item +Typical call with visualization +@example +ffmpeg -i input -vf vidstabdetect=shakiness=5:show=1 dummy.avi +@end example +@end itemize + + @section stereo3d Convert between different stereoscopic image formats. diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 400eae7347..133e036653 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -179,6 +179,8 @@ OBJS-$(CONFIG_TINTERLACE_FILTER) += vf_tinterlace.o OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o OBJS-$(CONFIG_UNSHARP_FILTER) += vf_unsharp.o OBJS-$(CONFIG_VFLIP_FILTER) += vf_vflip.o +OBJS-$(CONFIG_VIDSTABDETECT_FILTER) += vidstabutils.o vf_vidstabdetect.o +OBJS-$(CONFIG_VIDSTABTRANSFORM_FILTER) += vidstabutils.o vf_vidstabtransform.o OBJS-$(CONFIG_YADIF_FILTER) += vf_yadif.o OBJS-$(CONFIG_CELLAUTO_FILTER) += vsrc_cellauto.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 3e2d62cdfb..2fc8b48484 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -176,6 +176,8 @@ void avfilter_register_all(void) REGISTER_FILTER(TRANSPOSE, transpose, vf); REGISTER_FILTER(UNSHARP, unsharp, vf); REGISTER_FILTER(VFLIP, vflip, vf); + REGISTER_FILTER(VIDSTABDETECT, vidstabdetect, vf); + REGISTER_FILTER(VIDSTABTRANSFORM, vidstabtransform, vf); REGISTER_FILTER(YADIF, yadif, vf); REGISTER_FILTER(CELLAUTO, cellauto, vsrc); diff --git a/libavfilter/vf_vidstabdetect.c b/libavfilter/vf_vidstabdetect.c new file mode 100644 index 0000000000..145d7f9b6d --- /dev/null +++ b/libavfilter/vf_vidstabdetect.c @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2013 Georg Martius + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define DEFAULT_RESULT_NAME "transforms.trf" + +#include + +#include "libavutil/common.h" +#include "libavutil/opt.h" +#include "libavutil/imgutils.h" +#include "avfilter.h" +#include "internal.h" + +#include "vidstabutils.h" + +typedef struct { + const AVClass* class; + + VSMotionDetect md; + VSMotionDetectConfig conf; + + char* result; + FILE* f; +} StabData; + + +#define OFFSET(x) offsetof(StabData, x) +#define OFFSETC(x) (offsetof(StabData, conf)+offsetof(VSMotionDetectConfig, x)) +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM + +static const AVOption vidstabdetect_options[]= { + {"result", "path to the file used to write the transforms (def:transforms.trf)", OFFSET(result), AV_OPT_TYPE_STRING, {.str = DEFAULT_RESULT_NAME}}, + {"shakiness", "how shaky is the video and how quick is the camera?" + " 1: little (fast) 10: very strong/quick (slow) (def: 5)", OFFSETC(shakiness), AV_OPT_TYPE_INT, {.i64 = 5}, 1, 10, FLAGS}, + {"accuracy", "(>=shakiness) 1: low 15: high (slow) (def: 9)", OFFSETC(accuracy), AV_OPT_TYPE_INT, {.i64 = 9 }, 1, 15, FLAGS}, + {"stepsize", "region around minimum is scanned with 1 pixel resolution (def: 6)", OFFSETC(stepSize), AV_OPT_TYPE_INT, {.i64 = 6}, 1, 32, FLAGS}, + {"mincontrast", "below this contrast a field is discarded (0-1) (def: 0.3)", OFFSETC(contrastThreshold), AV_OPT_TYPE_DOUBLE, {.dbl = 0.25}, 0.0, 1.0, FLAGS}, + {"show", "0: draw nothing (def); 1,2: show fields and transforms", OFFSETC(show), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 2, FLAGS}, + {"tripod", "virtual tripod mode (if >0): motion is compared to a reference" + " reference frame (frame # is the value) (def: 0)", OFFSETC(virtualTripod), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS}, + {NULL}, +}; + +AVFILTER_DEFINE_CLASS(vidstabdetect); + +static av_cold int init(AVFilterContext *ctx) +{ + StabData* sd = ctx->priv; + vs_set_mem_and_log_functions(); + sd->class = &vidstabdetect_class; + av_log(ctx, AV_LOG_VERBOSE, "vidstabdetect filter: init %s\n", LIBVIDSTAB_VERSION); + return 0; +} + +static av_cold void uninit(AVFilterContext *ctx) +{ + StabData *sd = ctx->priv; + VSMotionDetect* md = &(sd->md); + + if (sd->f) { + fclose(sd->f); + sd->f = NULL; + } + + vsMotionDetectionCleanup(md); + +} + +static int query_formats(AVFilterContext *ctx) +{ + // If you add something here also add it in vidstabutils.c + static const enum AVPixelFormat pix_fmts[] = { + AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P, + AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUVA420P, + AV_PIX_FMT_YUV440P, AV_PIX_FMT_GRAY8, + AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24, AV_PIX_FMT_RGBA, + AV_PIX_FMT_NONE + }; + + ff_set_common_formats(ctx, ff_make_format_list(pix_fmts)); + return 0; +} + + +static int config_input(AVFilterLink *inlink) +{ + AVFilterContext *ctx = inlink->dst; + StabData *sd = ctx->priv; + + VSMotionDetect* md = &(sd->md); + VSFrameInfo fi; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); + + vsFrameInfoInit(&fi,inlink->w, inlink->h, av_2_vs_pixel_format(ctx, inlink->format)); + if(fi.bytesPerPixel != av_get_bits_per_pixel(desc)/8){ + av_log(ctx, AV_LOG_ERROR, "pixel-format error: wrong bits/per/pixel, please report a BUG"); + return AVERROR(EINVAL); + } + if(fi.log2ChromaW != desc->log2_chroma_w){ + av_log(ctx, AV_LOG_ERROR, "pixel-format error: log2_chroma_w, please report a BUG"); + return AVERROR(EINVAL); + } + + if(fi.log2ChromaH != desc->log2_chroma_h){ + av_log(ctx, AV_LOG_ERROR, "pixel-format error: log2_chroma_h, please report a BUG"); + return AVERROR(EINVAL); + } + + // set values that are not initializes by the options + sd->conf.algo = 1; + sd->conf.modName = "vidstabdetect"; + if(vsMotionDetectInit(md, &sd->conf, &fi) != VS_OK){ + av_log(ctx, AV_LOG_ERROR, "initialization of Motion Detection failed, please report a BUG"); + return AVERROR(EINVAL); + } + + vsMotionDetectGetConfig(&sd->conf, md); + av_log(ctx, AV_LOG_INFO, "Video stabilization settings (pass 1/2):\n"); + av_log(ctx, AV_LOG_INFO, " shakiness = %d\n", sd->conf.shakiness); + av_log(ctx, AV_LOG_INFO, " accuracy = %d\n", sd->conf.accuracy); + av_log(ctx, AV_LOG_INFO, " stepsize = %d\n", sd->conf.stepSize); + av_log(ctx, AV_LOG_INFO, " mincontrast = %f\n", sd->conf.contrastThreshold); + av_log(ctx, AV_LOG_INFO, " show = %d\n", sd->conf.show); + av_log(ctx, AV_LOG_INFO, " result = %s\n", sd->result); + + sd->f = fopen(sd->result, "w"); + if (sd->f == NULL) { + av_log(ctx, AV_LOG_ERROR, "cannot open transform file %s\n", sd->result); + return AVERROR(EINVAL); + }else{ + if(vsPrepareFile(md, sd->f) != VS_OK){ + av_log(ctx, AV_LOG_ERROR, "cannot write to transform file %s\n", sd->result); + return AVERROR(EINVAL); + } + } + return 0; +} + + +static int filter_frame(AVFilterLink *inlink, AVFrame *in) +{ + AVFilterContext *ctx = inlink->dst; + StabData *sd = ctx->priv; + VSMotionDetect* md = &(sd->md); + LocalMotions localmotions; + + AVFilterLink *outlink = inlink->dst->outputs[0]; + int direct = 0; + AVFrame *out; + VSFrame frame; + int plane; + + if (av_frame_is_writable(in)) { + direct = 1; + out = in; + } else { + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + av_frame_free(&in); + return AVERROR(ENOMEM); + } + av_frame_copy_props(out, in); + } + + for(plane=0; plane < md->fi.planes; plane++){ + frame.data[plane] = in->data[plane]; + frame.linesize[plane] = in->linesize[plane]; + } + if(vsMotionDetection(md, &localmotions, &frame) != VS_OK){ + av_log(ctx, AV_LOG_ERROR, "motion detection failed"); + return AVERROR(AVERROR_EXTERNAL); + } else { + if(vsWriteToFile(md, sd->f, &localmotions) != VS_OK){ + av_log(ctx, AV_LOG_ERROR, "cannot write to transform file"); + return AVERROR(errno); + } + vs_vector_del(&localmotions); + } + if(sd->conf.show>0 && !direct){ + av_image_copy(out->data, out->linesize, + (void*)in->data, in->linesize, + in->format, in->width, in->height); + } + + if (!direct) + av_frame_free(&in); + + return ff_filter_frame(outlink, out); +} + +static const AVFilterPad avfilter_vf_vidstabdetect_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = filter_frame, + .config_props = config_input, + }, + { NULL } +}; + +static const AVFilterPad avfilter_vf_vidstabdetect_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + }, + { NULL } +}; + +AVFilter avfilter_vf_vidstabdetect = { + .name = "vidstabdetect", + .description = NULL_IF_CONFIG_SMALL("pass 1 of 2 for stabilization" + "extracts relative transformations" + "(pass 2 see vidstabtransform)"), + .priv_size = sizeof(StabData), + .init = init, + .uninit = uninit, + .query_formats = query_formats, + + .inputs = avfilter_vf_vidstabdetect_inputs, + .outputs = avfilter_vf_vidstabdetect_outputs, + .priv_class = &vidstabdetect_class, +}; diff --git a/libavfilter/vf_vidstabtransform.c b/libavfilter/vf_vidstabtransform.c new file mode 100644 index 0000000000..f40093a548 --- /dev/null +++ b/libavfilter/vf_vidstabtransform.c @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2013 Georg Martius + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define DEFAULT_INPUT_NAME "transforms.trf" + +#include + +#include "libavutil/common.h" +#include "libavutil/opt.h" +#include "libavutil/imgutils.h" +#include "avfilter.h" +#include "internal.h" + +#include "vidstabutils.h" + +typedef struct { + const AVClass* class; + + VSTransformData td; + VSTransformConfig conf; + + VSTransformations trans; // transformations + char* input; // name of transform file + int tripod; +} TransformContext; + +#define OFFSET(x) offsetof(TransformContext, x) +#define OFFSETC(x) (offsetof(TransformContext, conf)+offsetof(VSTransformConfig, x)) +#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM + +static const AVOption vidstabtransform_options[]= { + {"input", "path to the file storing the transforms (def:transforms.trf)", OFFSET(input), + AV_OPT_TYPE_STRING, {.str = DEFAULT_INPUT_NAME} }, + {"smoothing", "number of frames*2 + 1 used for lowpass filtering (def: 10)", OFFSETC(smoothing), + AV_OPT_TYPE_INT, {.i64 = 10}, 1, 1000, FLAGS}, + {"maxshift", "maximal number of pixels to translate image (def: -1 no limit)", OFFSETC(maxShift), + AV_OPT_TYPE_INT, {.i64 = -1}, -1, 500, FLAGS}, + {"maxangle", "maximal angle in rad to rotate image (def: -1 no limit)", OFFSETC(maxAngle), + AV_OPT_TYPE_DOUBLE, {.dbl = -1.0}, -1.0, 3.14, FLAGS}, + {"crop", "keep: (def), black", OFFSETC(crop), + AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS, "crop"}, + { "keep", "keep border", 0, + AV_OPT_TYPE_CONST, {.i64 = VSKeepBorder }, 0, 0, FLAGS, "crop"}, + { "black", "black border", 0, + AV_OPT_TYPE_CONST, {.i64 = VSCropBorder }, 0, 0, FLAGS, "crop"}, + {"invert", "1: invert transforms (def: 0)", OFFSETC(invert), + AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS}, + {"relative", "consider transforms as 0: abslute, 1: relative (def)", OFFSETC(relative), + AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, FLAGS}, + {"zoom", "percentage to zoom >0: zoom in, <0 zoom out (def: 0)", OFFSETC(zoom), + AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, 100, FLAGS}, + {"optzoom", "0: nothing, 1: determine optimal zoom (def) (added to 'zoom')", OFFSETC(optZoom), + AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, FLAGS}, + {"interpol", "type of interpolation, no, linear, bilinear (def) , bicubic", OFFSETC(interpolType), + AV_OPT_TYPE_INT, {.i64 = 2}, 0, 3, FLAGS, "interpol"}, + { "no", "no interpolation", 0, + AV_OPT_TYPE_CONST, {.i64 = VS_Zero }, 0, 0, FLAGS, "interpol"}, + { "linear", "linear (horizontal)", 0, + AV_OPT_TYPE_CONST, {.i64 = VS_Linear }, 0, 0, FLAGS, "interpol"}, + { "bilinear","bi-linear", 0, + AV_OPT_TYPE_CONST, {.i64 = VS_BiLinear},0, 0, FLAGS, "interpol"}, + { "bicubic", "bi-cubic", 0, + AV_OPT_TYPE_CONST, {.i64 = VS_BiCubic },0, 0, FLAGS, "interpol"}, + {"tripod", "if 1: virtual tripod mode (equiv. to relative=0:smoothing=0)", OFFSET(tripod), + AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, FLAGS}, + {NULL}, +}; + +AVFILTER_DEFINE_CLASS(vidstabtransform); + +static av_cold int init(AVFilterContext *ctx) +{ + TransformContext* tc = ctx->priv; + vs_set_mem_and_log_functions(); + tc->class = &vidstabtransform_class; + av_log(ctx, AV_LOG_VERBOSE, "vidstabtransform filter: init %s\n", LIBVIDSTAB_VERSION); + return 0; +} + +static av_cold void uninit(AVFilterContext *ctx) +{ + TransformContext *tc = ctx->priv; + + vsTransformDataCleanup(&tc->td); + vsTransformationsCleanup(&tc->trans); +} + +static int query_formats(AVFilterContext *ctx) +{ + // If you add something here also add it in vidstabutils.c + static const enum AVPixelFormat pix_fmts[] = { + AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P, + AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUVA420P, + AV_PIX_FMT_YUV440P, AV_PIX_FMT_GRAY8, + AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24, AV_PIX_FMT_RGBA, + AV_PIX_FMT_NONE + }; + + ff_set_common_formats(ctx, ff_make_format_list(pix_fmts)); + return 0; +} + + +static int config_input(AVFilterLink *inlink) +{ + AVFilterContext *ctx = inlink->dst; + TransformContext *tc = ctx->priv; + FILE* f; + + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); + + VSTransformData* td = &(tc->td); + + VSFrameInfo fi_src; + VSFrameInfo fi_dest; + + if(!vsFrameInfoInit(&fi_src, inlink->w, inlink->h, + av_2_vs_pixel_format(ctx,inlink->format)) || + !vsFrameInfoInit(&fi_dest, inlink->w, inlink->h, + av_2_vs_pixel_format(ctx, inlink->format))){ + av_log(ctx, AV_LOG_ERROR, "unknown pixel format: %i (%s)", + inlink->format, desc->name); + return AVERROR(EINVAL); + } + + if(fi_src.bytesPerPixel != av_get_bits_per_pixel(desc)/8 || + fi_src.log2ChromaW != desc->log2_chroma_w || + fi_src.log2ChromaH != desc->log2_chroma_h){ + av_log(ctx, AV_LOG_ERROR, "pixel-format error: bpp %i<>%i ", + fi_src.bytesPerPixel, av_get_bits_per_pixel(desc)/8); + av_log(ctx, AV_LOG_ERROR, "chroma_subsampl: w: %i<>%i h: %i<>%i\n", + fi_src.log2ChromaW, desc->log2_chroma_w, + fi_src.log2ChromaH, desc->log2_chroma_h); + return AVERROR(EINVAL); + } + + // set values that are not initializes by the options + tc->conf.modName = "vidstabtransform"; + tc->conf.verbose =1; + if(tc->tripod){ + av_log(ctx, AV_LOG_INFO, "Virtual tripod mode: relative=0, smoothing=0"); + tc->conf.relative=0; + tc->conf.smoothing=0; + } + + if(vsTransformDataInit(td, &tc->conf, &fi_src, &fi_dest) != VS_OK){ + av_log(ctx, AV_LOG_ERROR, "initialization of vid.stab transform failed, please report a BUG\n"); + return AVERROR(EINVAL); + } + + vsTransformGetConfig(&tc->conf,td); + av_log(ctx, AV_LOG_INFO, "Video transformation/stabilization settings (pass 2/2):\n"); + av_log(ctx, AV_LOG_INFO, " input = %s\n", tc->input); + av_log(ctx, AV_LOG_INFO, " smoothing = %d\n", tc->conf.smoothing); + av_log(ctx, AV_LOG_INFO, " maxshift = %d\n", tc->conf.maxShift); + av_log(ctx, AV_LOG_INFO, " maxangle = %f\n", tc->conf.maxAngle); + av_log(ctx, AV_LOG_INFO, " crop = %s\n", tc->conf.crop ? "Black" : "Keep"); + av_log(ctx, AV_LOG_INFO, " relative = %s\n", tc->conf.relative ? "True": "False"); + av_log(ctx, AV_LOG_INFO, " invert = %s\n", tc->conf.invert ? "True" : "False"); + av_log(ctx, AV_LOG_INFO, " zoom = %f\n", tc->conf.zoom); + av_log(ctx, AV_LOG_INFO, " optzoom = %s\n", tc->conf.optZoom ? "On" : "Off"); + av_log(ctx, AV_LOG_INFO, " interpol = %s\n", getInterpolationTypeName(tc->conf.interpolType)); + + f = fopen(tc->input, "r"); + if (f == NULL) { + av_log(ctx, AV_LOG_ERROR, "cannot open input file %s\n", tc->input); + return AVERROR(errno); + } else { + VSManyLocalMotions mlms; + if(vsReadLocalMotionsFile(f,&mlms)==VS_OK){ + // calculate the actual transforms from the localmotions + if(vsLocalmotions2TransformsSimple(td, &mlms,&tc->trans)!=VS_OK){ + av_log(ctx, AV_LOG_ERROR, "calculating transformations failed\n"); + return AVERROR(EINVAL); + } + }else{ // try to read old format + if (!vsReadOldTransforms(td, f, &tc->trans)) { /* read input file */ + av_log(ctx, AV_LOG_ERROR, "error parsing input file %s\n", tc->input); + return AVERROR(EINVAL); + } + } + } + fclose(f); + + if (vsPreprocessTransforms(td, &tc->trans)!= VS_OK ) { + av_log(ctx, AV_LOG_ERROR, "error while preprocessing transforms\n"); + return AVERROR(EINVAL); + } + + // TODO: add sharpening, so far the user needs to call the unsharp filter manually + return 0; +} + + +static int filter_frame(AVFilterLink *inlink, AVFrame *in) +{ + AVFilterContext *ctx = inlink->dst; + TransformContext *tc = ctx->priv; + VSTransformData* td = &(tc->td); + + AVFilterLink *outlink = inlink->dst->outputs[0]; + int direct = 0; + AVFrame *out; + VSFrame inframe; + int plane; + + if (av_frame_is_writable(in)) { + direct = 1; + out = in; + } else { + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + av_frame_free(&in); + return AVERROR(ENOMEM); + } + av_frame_copy_props(out, in); + } + + for(plane=0; plane < vsTransformGetSrcFrameInfo(td)->planes; plane++){ + inframe.data[plane] = in->data[plane]; + inframe.linesize[plane] = in->linesize[plane]; + } + if(out == in){ // inplace + vsTransformPrepare(td, &inframe, &inframe); + }else{ // seperate frames + VSFrame outframe; + for(plane=0; plane < vsTransformGetDestFrameInfo(td)->planes; plane++){ + outframe.data[plane] = out->data[plane]; + outframe.linesize[plane] = out->linesize[plane]; + } + vsTransformPrepare(td, &inframe, &outframe); + } + + vsDoTransform(td, vsGetNextTransform(td, &tc->trans)); + + vsTransformFinish(td); + + if (!direct) + av_frame_free(&in); + + return ff_filter_frame(outlink, out); +} + +static const AVFilterPad avfilter_vf_vidstabtransform_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = filter_frame, + .config_props = config_input, + }, + { NULL } +}; + +static const AVFilterPad avfilter_vf_vidstabtransform_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + }, + { NULL } +}; + +AVFilter avfilter_vf_vidstabtransform = { + .name = "vidstabtransform", + .description = NULL_IF_CONFIG_SMALL("pass 2 of stabilization" + "transforms the frames" + "(see vidstabdetect for pass 1)"), + .priv_size = sizeof(TransformContext), + .init = init, + .uninit = uninit, + .query_formats = query_formats, + + .inputs = avfilter_vf_vidstabtransform_inputs, + .outputs = avfilter_vf_vidstabtransform_outputs, + .priv_class = &vidstabtransform_class, + +}; + diff --git a/libavfilter/vidstabutils.c b/libavfilter/vidstabutils.c new file mode 100644 index 0000000000..60c8eed586 --- /dev/null +++ b/libavfilter/vidstabutils.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2013 Georg Martius + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "vidstabutils.h" + + +/** convert AV's pixelformat to vid.stab pixelformat */ +VSPixelFormat av_2_vs_pixel_format(AVFilterContext *ctx, enum AVPixelFormat pf){ + switch(pf){ + case AV_PIX_FMT_YUV420P: return PF_YUV420P; + case AV_PIX_FMT_YUV422P: return PF_YUV422P; + case AV_PIX_FMT_YUV444P: return PF_YUV444P; + case AV_PIX_FMT_YUV410P: return PF_YUV410P; + case AV_PIX_FMT_YUV411P: return PF_YUV411P; + case AV_PIX_FMT_YUV440P: return PF_YUV440P; + case AV_PIX_FMT_YUVA420P: return PF_YUVA420P; + case AV_PIX_FMT_GRAY8: return PF_GRAY8; + case AV_PIX_FMT_RGB24: return PF_RGB24; + case AV_PIX_FMT_BGR24: return PF_BGR24; + case AV_PIX_FMT_RGBA: return PF_RGBA; + default: + av_log(ctx, AV_LOG_ERROR, "cannot deal with pixel format %i\n", pf); + return PF_NONE; + } +} + + +/** struct to hold a valid context for logging from within vid.stab lib */ +typedef struct { + const AVClass* class; +} VS2AVLogCtx; + +/** wrapper to log vs_log into av_log */ +static int vs_2_av_log_wrapper(int type, const char* tag, const char* format, ...){ + va_list ap; + VS2AVLogCtx ctx; + AVClass class = { + .class_name = tag, + .item_name = av_default_item_name, + .option = 0, + .version = LIBAVUTIL_VERSION_INT, + .category = AV_CLASS_CATEGORY_FILTER, + }; + ctx.class = &class; + va_start (ap, format); + av_vlog(&ctx, type, format, ap); + va_end (ap); + return VS_OK; +} + +/** sets the memory allocation function and logging constants to av versions */ +void vs_set_mem_and_log_functions(void){ + vs_malloc = av_malloc; + vs_zalloc = av_mallocz; + vs_realloc = av_realloc; + vs_free = av_free; + + VS_ERROR_TYPE = AV_LOG_ERROR; + VS_WARN_TYPE = AV_LOG_WARNING; + VS_INFO_TYPE = AV_LOG_INFO; + VS_MSG_TYPE = AV_LOG_VERBOSE; + + vs_log = vs_2_av_log_wrapper; + + VS_ERROR = 0; + VS_OK = 1; +} diff --git a/libavfilter/vidstabutils.h b/libavfilter/vidstabutils.h new file mode 100644 index 0000000000..f1c20e6c65 --- /dev/null +++ b/libavfilter/vidstabutils.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2013 Georg Martius + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_VIDSTABUTILS_H +#define AVFILTER_VIDSTABUTILS_H + +#include + +#include "avfilter.h" + +/* ** some conversions from avlib to vid.stab constants and functions *** */ + +/** converts the pixelformat of avlib into the one of the vid.stab library */ +VSPixelFormat av_2_vs_pixel_format(AVFilterContext *ctx, enum AVPixelFormat pf); + +/** sets the memory allocation function and logging constants to av versions */ +void vs_set_mem_and_log_functions(void); + +#endif