2017-05-10 17:18:16 +00:00
/*
* Copyright ( c ) 2017 , NVIDIA CORPORATION . All rights reserved .
*
* Permission is hereby granted , free of charge , to any person obtaining a
* copy of this software and associated documentation files ( the " Software " ) ,
* to deal in the Software without restriction , including without limitation
* the rights to use , copy , modify , merge , publish , distribute , sublicense ,
* and / or sell copies of the Software , and to permit persons to whom the
* Software is furnished to do so , subject to the following conditions :
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software .
*
* THE SOFTWARE IS PROVIDED " AS IS " , WITHOUT WARRANTY OF ANY KIND , EXPRESS OR
* IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY ,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT . IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM , DAMAGES OR OTHER
* LIABILITY , WHETHER IN AN ACTION OF CONTRACT , TORT OR OTHERWISE , ARISING
* FROM , OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE .
*/
2020-11-04 17:10:19 +00:00
# include <float.h>
2017-05-10 17:18:16 +00:00
# include <stdio.h>
# include "libavutil/common.h"
# include "libavutil/hwcontext.h"
2019-02-21 03:57:51 +00:00
# include "libavutil/hwcontext_cuda_internal.h"
2018-11-11 06:47:28 +00:00
# include "libavutil/cuda_check.h"
2017-05-10 17:18:16 +00:00
# include "libavutil/internal.h"
# include "libavutil/opt.h"
# include "libavutil/pixdesc.h"
# include "avfilter.h"
2024-08-05 16:00:30 +00:00
# include "filters.h"
2019-12-08 11:12:36 +00:00
# include "scale_eval.h"
2017-05-10 17:18:16 +00:00
# include "video.h"
2021-06-11 21:54:34 +00:00
# include "cuda/load_helper.h"
2020-11-04 17:10:19 +00:00
# include "vf_scale_cuda.h"
2017-05-10 17:18:16 +00:00
static const enum AVPixelFormat supported_formats [ ] = {
AV_PIX_FMT_YUV420P ,
AV_PIX_FMT_NV12 ,
AV_PIX_FMT_YUV444P ,
AV_PIX_FMT_P010 ,
2019-05-14 03:00:12 +00:00
AV_PIX_FMT_P016 ,
AV_PIX_FMT_YUV444P16 ,
2020-11-04 18:53:41 +00:00
AV_PIX_FMT_0RGB32 ,
AV_PIX_FMT_0BGR32 ,
2023-06-16 05:10:37 +00:00
AV_PIX_FMT_RGB32 ,
AV_PIX_FMT_BGR32 ,
2017-05-10 17:18:16 +00:00
} ;
# define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
# define BLOCKX 32
# define BLOCKY 16
2019-02-21 03:57:51 +00:00
# define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x)
2018-11-11 06:47:28 +00:00
2020-10-31 19:22:33 +00:00
enum {
INTERP_ALGO_DEFAULT ,
2020-11-03 18:28:06 +00:00
INTERP_ALGO_NEAREST ,
2020-10-31 19:22:33 +00:00
INTERP_ALGO_BILINEAR ,
INTERP_ALGO_BICUBIC ,
2020-11-04 00:43:00 +00:00
INTERP_ALGO_LANCZOS ,
2020-10-31 19:22:33 +00:00
INTERP_ALGO_COUNT
} ;
2017-05-10 17:18:16 +00:00
typedef struct CUDAScaleContext {
const AVClass * class ;
2019-02-21 03:57:51 +00:00
AVCUDADeviceContext * hwctx ;
2021-06-23 23:53:10 +00:00
enum AVPixelFormat in_fmt , out_fmt ;
const AVPixFmtDescriptor * in_desc , * out_desc ;
int in_planes , out_planes ;
int in_plane_depths [ 4 ] ;
int in_plane_channels [ 4 ] ;
2017-05-10 17:18:16 +00:00
AVBufferRef * frames_ctx ;
AVFrame * frame ;
AVFrame * tmp_frame ;
int passthrough ;
/**
* Output sw format . AV_PIX_FMT_NONE for no conversion .
*/
enum AVPixelFormat format ;
char * w_expr ; ///< width expression string
char * h_expr ; ///< height expression string
2019-12-02 15:41:21 +00:00
int force_original_aspect_ratio ;
int force_divisible_by ;
2017-05-10 17:18:16 +00:00
CUcontext cu_ctx ;
CUmodule cu_module ;
2021-06-23 23:53:10 +00:00
CUfunction cu_func ;
CUfunction cu_func_uv ;
2019-02-21 03:57:51 +00:00
CUstream cu_stream ;
2017-05-10 17:18:16 +00:00
2020-10-31 19:22:33 +00:00
int interp_algo ;
int interp_use_linear ;
2020-11-03 17:33:55 +00:00
int interp_as_integer ;
2020-11-04 17:10:19 +00:00
float param ;
2017-05-10 17:18:16 +00:00
} CUDAScaleContext ;
static av_cold int cudascale_init ( AVFilterContext * ctx )
{
CUDAScaleContext * s = ctx - > priv ;
s - > frame = av_frame_alloc ( ) ;
if ( ! s - > frame )
return AVERROR ( ENOMEM ) ;
s - > tmp_frame = av_frame_alloc ( ) ;
if ( ! s - > tmp_frame )
return AVERROR ( ENOMEM ) ;
return 0 ;
}
static av_cold void cudascale_uninit ( AVFilterContext * ctx )
{
CUDAScaleContext * s = ctx - > priv ;
2020-10-12 11:59:39 +00:00
if ( s - > hwctx & & s - > cu_module ) {
CudaFunctions * cu = s - > hwctx - > internal - > cuda_dl ;
CUcontext dummy ;
CHECK_CU ( cu - > cuCtxPushCurrent ( s - > hwctx - > cuda_ctx ) ) ;
CHECK_CU ( cu - > cuModuleUnload ( s - > cu_module ) ) ;
s - > cu_module = NULL ;
CHECK_CU ( cu - > cuCtxPopCurrent ( & dummy ) ) ;
}
2017-05-10 17:18:16 +00:00
av_frame_free ( & s - > frame ) ;
av_buffer_unref ( & s - > frames_ctx ) ;
av_frame_free ( & s - > tmp_frame ) ;
}
2020-11-03 17:33:55 +00:00
static av_cold int init_hwframe_ctx ( CUDAScaleContext * s , AVBufferRef * device_ctx , int width , int height )
2017-05-10 17:18:16 +00:00
{
AVBufferRef * out_ref = NULL ;
AVHWFramesContext * out_ctx ;
2020-11-03 17:33:55 +00:00
int ret ;
2017-05-10 17:18:16 +00:00
out_ref = av_hwframe_ctx_alloc ( device_ctx ) ;
if ( ! out_ref )
return AVERROR ( ENOMEM ) ;
out_ctx = ( AVHWFramesContext * ) out_ref - > data ;
out_ctx - > format = AV_PIX_FMT_CUDA ;
out_ctx - > sw_format = s - > out_fmt ;
2020-11-03 17:33:55 +00:00
out_ctx - > width = FFALIGN ( width , 32 ) ;
out_ctx - > height = FFALIGN ( height , 32 ) ;
2017-05-10 17:18:16 +00:00
ret = av_hwframe_ctx_init ( out_ref ) ;
if ( ret < 0 )
goto fail ;
av_frame_unref ( s - > frame ) ;
ret = av_hwframe_get_buffer ( out_ref , s - > frame , 0 ) ;
if ( ret < 0 )
goto fail ;
2020-11-03 17:33:55 +00:00
s - > frame - > width = width ;
s - > frame - > height = height ;
2017-05-10 17:18:16 +00:00
av_buffer_unref ( & s - > frames_ctx ) ;
s - > frames_ctx = out_ref ;
return 0 ;
fail :
av_buffer_unref ( & out_ref ) ;
return ret ;
}
static int format_is_supported ( enum AVPixelFormat fmt )
{
int i ;
for ( i = 0 ; i < FF_ARRAY_ELEMS ( supported_formats ) ; i + + )
if ( supported_formats [ i ] = = fmt )
return 1 ;
return 0 ;
}
2021-06-23 23:53:10 +00:00
static av_cold void set_format_info ( AVFilterContext * ctx , enum AVPixelFormat in_format , enum AVPixelFormat out_format )
{
CUDAScaleContext * s = ctx - > priv ;
int i , p , d ;
s - > in_fmt = in_format ;
s - > out_fmt = out_format ;
s - > in_desc = av_pix_fmt_desc_get ( s - > in_fmt ) ;
s - > out_desc = av_pix_fmt_desc_get ( s - > out_fmt ) ;
s - > in_planes = av_pix_fmt_count_planes ( s - > in_fmt ) ;
s - > out_planes = av_pix_fmt_count_planes ( s - > out_fmt ) ;
// find maximum step of each component of each plane
// For our subset of formats, this should accurately tell us how many channels CUDA needs
// i.e. 1 for Y plane, 2 for UV plane of NV12, 4 for single plane of RGB0 formats
for ( i = 0 ; i < s - > in_desc - > nb_components ; i + + ) {
d = ( s - > in_desc - > comp [ i ] . depth + 7 ) / 8 ;
p = s - > in_desc - > comp [ i ] . plane ;
s - > in_plane_channels [ p ] = FFMAX ( s - > in_plane_channels [ p ] , s - > in_desc - > comp [ i ] . step / d ) ;
s - > in_plane_depths [ p ] = s - > in_desc - > comp [ i ] . depth ;
}
}
2017-05-10 17:18:16 +00:00
static av_cold int init_processing_chain ( AVFilterContext * ctx , int in_width , int in_height ,
int out_width , int out_height )
{
CUDAScaleContext * s = ctx - > priv ;
2024-08-05 16:00:30 +00:00
FilterLink * inl = ff_filter_link ( ctx - > inputs [ 0 ] ) ;
FilterLink * outl = ff_filter_link ( ctx - > outputs [ 0 ] ) ;
2017-05-10 17:18:16 +00:00
AVHWFramesContext * in_frames_ctx ;
enum AVPixelFormat in_format ;
enum AVPixelFormat out_format ;
int ret ;
/* check that we have a hw context */
2024-08-05 16:00:30 +00:00
if ( ! inl - > hw_frames_ctx ) {
2017-05-10 17:18:16 +00:00
av_log ( ctx , AV_LOG_ERROR , " No hw context provided on input \n " ) ;
return AVERROR ( EINVAL ) ;
}
2024-08-05 16:00:30 +00:00
in_frames_ctx = ( AVHWFramesContext * ) inl - > hw_frames_ctx - > data ;
2017-05-10 17:18:16 +00:00
in_format = in_frames_ctx - > sw_format ;
out_format = ( s - > format = = AV_PIX_FMT_NONE ) ? in_format : s - > format ;
if ( ! format_is_supported ( in_format ) ) {
av_log ( ctx , AV_LOG_ERROR , " Unsupported input format: %s \n " ,
av_get_pix_fmt_name ( in_format ) ) ;
return AVERROR ( ENOSYS ) ;
}
if ( ! format_is_supported ( out_format ) ) {
av_log ( ctx , AV_LOG_ERROR , " Unsupported output format: %s \n " ,
av_get_pix_fmt_name ( out_format ) ) ;
return AVERROR ( ENOSYS ) ;
}
2021-06-23 23:53:10 +00:00
set_format_info ( ctx , in_format , out_format ) ;
2017-05-10 17:18:16 +00:00
2020-11-03 17:33:55 +00:00
if ( s - > passthrough & & in_width = = out_width & & in_height = = out_height & & in_format = = out_format ) {
2024-08-05 16:00:30 +00:00
s - > frames_ctx = av_buffer_ref ( inl - > hw_frames_ctx ) ;
2020-11-03 17:33:55 +00:00
if ( ! s - > frames_ctx )
return AVERROR ( ENOMEM ) ;
} else {
s - > passthrough = 0 ;
2017-05-10 17:18:16 +00:00
2020-11-03 17:33:55 +00:00
ret = init_hwframe_ctx ( s , in_frames_ctx - > device_ref , out_width , out_height ) ;
if ( ret < 0 )
return ret ;
2021-06-23 23:53:10 +00:00
if ( in_width = = out_width & & in_height = = out_height & &
in_format = = out_format & & s - > interp_algo = = INTERP_ALGO_DEFAULT )
s - > interp_algo = INTERP_ALGO_NEAREST ;
2020-11-03 17:33:55 +00:00
}
2017-05-10 17:18:16 +00:00
2024-08-05 16:00:30 +00:00
outl - > hw_frames_ctx = av_buffer_ref ( s - > frames_ctx ) ;
if ( ! outl - > hw_frames_ctx )
2017-05-10 17:18:16 +00:00
return AVERROR ( ENOMEM ) ;
return 0 ;
}
2021-06-23 23:53:10 +00:00
static av_cold int cudascale_load_functions ( AVFilterContext * ctx )
2017-05-10 17:18:16 +00:00
{
2021-06-23 23:53:10 +00:00
CUDAScaleContext * s = ctx - > priv ;
CUcontext dummy , cuda_ctx = s - > hwctx - > cuda_ctx ;
CudaFunctions * cu = s - > hwctx - > internal - > cuda_dl ;
char buf [ 128 ] ;
2017-05-10 17:18:16 +00:00
int ret ;
2021-06-23 23:53:10 +00:00
const char * in_fmt_name = av_get_pix_fmt_name ( s - > in_fmt ) ;
const char * out_fmt_name = av_get_pix_fmt_name ( s - > out_fmt ) ;
2020-10-31 19:22:33 +00:00
const char * function_infix = " " ;
2021-06-11 21:54:34 +00:00
extern const unsigned char ff_vf_scale_cuda_ptx_data [ ] ;
extern const unsigned int ff_vf_scale_cuda_ptx_len ;
2020-10-31 19:22:33 +00:00
switch ( s - > interp_algo ) {
2020-11-03 18:28:06 +00:00
case INTERP_ALGO_NEAREST :
2021-06-23 23:53:10 +00:00
function_infix = " Nearest " ;
2020-11-03 18:28:06 +00:00
s - > interp_use_linear = 0 ;
s - > interp_as_integer = 1 ;
break ;
2020-10-31 19:22:33 +00:00
case INTERP_ALGO_BILINEAR :
2021-06-23 23:53:10 +00:00
function_infix = " Bilinear " ;
2020-10-31 19:22:33 +00:00
s - > interp_use_linear = 1 ;
2020-11-03 17:33:55 +00:00
s - > interp_as_integer = 1 ;
2020-10-31 19:22:33 +00:00
break ;
case INTERP_ALGO_DEFAULT :
case INTERP_ALGO_BICUBIC :
2021-06-23 23:53:10 +00:00
function_infix = " Bicubic " ;
2020-10-31 19:22:33 +00:00
s - > interp_use_linear = 0 ;
2020-11-03 17:33:55 +00:00
s - > interp_as_integer = 0 ;
2020-10-31 19:22:33 +00:00
break ;
2020-11-04 00:43:00 +00:00
case INTERP_ALGO_LANCZOS :
2021-06-23 23:53:10 +00:00
function_infix = " Lanczos " ;
2020-11-04 00:43:00 +00:00
s - > interp_use_linear = 0 ;
s - > interp_as_integer = 0 ;
break ;
2020-10-31 19:22:33 +00:00
default :
av_log ( ctx , AV_LOG_ERROR , " Unknown interpolation algorithm \n " ) ;
return AVERROR_BUG ;
}
2017-05-10 17:18:16 +00:00
2019-02-21 03:57:51 +00:00
ret = CHECK_CU ( cu - > cuCtxPushCurrent ( cuda_ctx ) ) ;
if ( ret < 0 )
2021-06-23 23:53:10 +00:00
return ret ;
2019-02-21 03:57:51 +00:00
2021-06-23 23:53:10 +00:00
ret = ff_cuda_load_module ( ctx , s - > hwctx , & s - > cu_module ,
2021-06-22 19:42:45 +00:00
ff_vf_scale_cuda_ptx_data , ff_vf_scale_cuda_ptx_len ) ;
2019-02-21 03:57:51 +00:00
if ( ret < 0 )
goto fail ;
2021-06-23 23:53:10 +00:00
snprintf ( buf , sizeof ( buf ) , " Subsample_%s_%s_%s " , function_infix , in_fmt_name , out_fmt_name ) ;
ret = CHECK_CU ( cu - > cuModuleGetFunction ( & s - > cu_func , s - > cu_module , buf ) ) ;
if ( ret < 0 ) {
av_log ( ctx , AV_LOG_FATAL , " Unsupported conversion: %s -> %s \n " , in_fmt_name , out_fmt_name ) ;
ret = AVERROR ( ENOSYS ) ;
2019-02-21 03:57:51 +00:00
goto fail ;
2021-06-23 23:53:10 +00:00
}
2019-02-21 03:57:51 +00:00
2021-06-23 23:53:10 +00:00
snprintf ( buf , sizeof ( buf ) , " Subsample_%s_%s_%s_uv " , function_infix , in_fmt_name , out_fmt_name ) ;
ret = CHECK_CU ( cu - > cuModuleGetFunction ( & s - > cu_func_uv , s - > cu_module , buf ) ) ;
2018-11-11 06:47:28 +00:00
if ( ret < 0 )
2017-05-10 17:18:16 +00:00
goto fail ;
2021-06-23 23:53:10 +00:00
fail :
CHECK_CU ( cu - > cuCtxPopCurrent ( & dummy ) ) ;
2019-02-21 03:57:51 +00:00
2021-06-23 23:53:10 +00:00
return ret ;
}
2017-05-10 17:18:16 +00:00
2021-06-23 23:53:10 +00:00
static av_cold int cudascale_config_props ( AVFilterLink * outlink )
{
AVFilterContext * ctx = outlink - > src ;
AVFilterLink * inlink = outlink - > src - > inputs [ 0 ] ;
2024-08-05 16:00:30 +00:00
FilterLink * inl = ff_filter_link ( inlink ) ;
2021-06-23 23:53:10 +00:00
CUDAScaleContext * s = ctx - > priv ;
2024-08-05 10:13:52 +00:00
AVHWFramesContext * frames_ctx ;
AVCUDADeviceContext * device_hwctx ;
2021-06-23 23:53:10 +00:00
int w , h ;
int ret ;
2019-02-21 03:57:51 +00:00
2017-05-10 17:18:16 +00:00
if ( ( ret = ff_scale_eval_dimensions ( s ,
s - > w_expr , s - > h_expr ,
inlink , outlink ,
& w , & h ) ) < 0 )
goto fail ;
2019-12-02 15:41:21 +00:00
ff_scale_adjust_dimensions ( inlink , & w , & h ,
s - > force_original_aspect_ratio , s - > force_divisible_by ) ;
2017-05-10 17:18:16 +00:00
if ( ( ( int64_t ) h * inlink - > w ) > INT_MAX | |
( ( int64_t ) w * inlink - > h ) > INT_MAX )
av_log ( ctx , AV_LOG_ERROR , " Rescaled value for width or height is too big. \n " ) ;
outlink - > w = w ;
outlink - > h = h ;
ret = init_processing_chain ( ctx , inlink - > w , inlink - > h , w , h ) ;
if ( ret < 0 )
return ret ;
2024-08-05 16:00:30 +00:00
frames_ctx = ( AVHWFramesContext * ) inl - > hw_frames_ctx - > data ;
2024-08-05 10:13:52 +00:00
device_hwctx = frames_ctx - > device_ctx - > hwctx ;
s - > hwctx = device_hwctx ;
s - > cu_stream = s - > hwctx - > stream ;
2017-05-10 17:18:16 +00:00
if ( inlink - > sample_aspect_ratio . num ) {
outlink - > sample_aspect_ratio = av_mul_q ( ( AVRational ) { outlink - > h * inlink - > w ,
outlink - > w * inlink - > h } ,
inlink - > sample_aspect_ratio ) ;
} else {
outlink - > sample_aspect_ratio = inlink - > sample_aspect_ratio ;
}
2021-06-23 23:53:10 +00:00
av_log ( ctx , AV_LOG_VERBOSE , " w:%d h:%d fmt:%s -> w:%d h:%d fmt:%s%s \n " ,
inlink - > w , inlink - > h , av_get_pix_fmt_name ( s - > in_fmt ) ,
outlink - > w , outlink - > h , av_get_pix_fmt_name ( s - > out_fmt ) ,
s - > passthrough ? " (passthrough) " : " " ) ;
ret = cudascale_load_functions ( ctx ) ;
if ( ret < 0 )
return ret ;
2017-05-10 17:18:16 +00:00
return 0 ;
fail :
return ret ;
}
2021-06-23 23:53:10 +00:00
static int call_resize_kernel ( AVFilterContext * ctx , CUfunction func ,
CUtexObject src_tex [ 4 ] , int src_width , int src_height ,
AVFrame * out_frame , int dst_width , int dst_height , int dst_pitch )
2017-05-10 17:18:16 +00:00
{
2019-02-21 03:57:51 +00:00
CUDAScaleContext * s = ctx - > priv ;
CudaFunctions * cu = s - > hwctx - > internal - > cuda_dl ;
2021-06-23 23:53:10 +00:00
CUdeviceptr dst_devptr [ 4 ] = {
( CUdeviceptr ) out_frame - > data [ 0 ] , ( CUdeviceptr ) out_frame - > data [ 1 ] ,
( CUdeviceptr ) out_frame - > data [ 2 ] , ( CUdeviceptr ) out_frame - > data [ 3 ]
2019-02-21 03:57:51 +00:00
} ;
2017-05-10 17:18:16 +00:00
2021-06-23 23:53:10 +00:00
void * args_uchar [ ] = {
& src_tex [ 0 ] , & src_tex [ 1 ] , & src_tex [ 2 ] , & src_tex [ 3 ] ,
& dst_devptr [ 0 ] , & dst_devptr [ 1 ] , & dst_devptr [ 2 ] , & dst_devptr [ 3 ] ,
& dst_width , & dst_height , & dst_pitch ,
& src_width , & src_height , & s - > param
2019-02-21 03:57:51 +00:00
} ;
2017-05-10 17:18:16 +00:00
2021-06-23 23:53:10 +00:00
return CHECK_CU ( cu - > cuLaunchKernel ( func ,
DIV_UP ( dst_width , BLOCKX ) , DIV_UP ( dst_height , BLOCKY ) , 1 ,
BLOCKX , BLOCKY , 1 , 0 , s - > cu_stream , args_uchar , NULL ) ) ;
2017-05-10 17:18:16 +00:00
}
static int scalecuda_resize ( AVFilterContext * ctx ,
AVFrame * out , AVFrame * in )
{
CUDAScaleContext * s = ctx - > priv ;
2021-06-23 23:53:10 +00:00
CudaFunctions * cu = s - > hwctx - > internal - > cuda_dl ;
CUcontext dummy , cuda_ctx = s - > hwctx - > cuda_ctx ;
int i , ret ;
2017-05-10 17:18:16 +00:00
2021-06-23 23:53:10 +00:00
CUtexObject tex [ 4 ] = { 0 , 0 , 0 , 0 } ;
ret = CHECK_CU ( cu - > cuCtxPushCurrent ( cuda_ctx ) ) ;
if ( ret < 0 )
return ret ;
for ( i = 0 ; i < s - > in_planes ; i + + ) {
CUDA_TEXTURE_DESC tex_desc = {
. filterMode = s - > interp_use_linear ?
CU_TR_FILTER_MODE_LINEAR :
CU_TR_FILTER_MODE_POINT ,
. flags = s - > interp_as_integer ? CU_TRSF_READ_AS_INTEGER : 0 ,
} ;
CUDA_RESOURCE_DESC res_desc = {
. resType = CU_RESOURCE_TYPE_PITCH2D ,
. res . pitch2D . format = s - > in_plane_depths [ i ] < = 8 ?
CU_AD_FORMAT_UNSIGNED_INT8 :
CU_AD_FORMAT_UNSIGNED_INT16 ,
. res . pitch2D . numChannels = s - > in_plane_channels [ i ] ,
. res . pitch2D . pitchInBytes = in - > linesize [ i ] ,
. res . pitch2D . devPtr = ( CUdeviceptr ) in - > data [ i ] ,
} ;
if ( i = = 1 | | i = = 2 ) {
res_desc . res . pitch2D . width = AV_CEIL_RSHIFT ( in - > width , s - > in_desc - > log2_chroma_w ) ;
res_desc . res . pitch2D . height = AV_CEIL_RSHIFT ( in - > height , s - > in_desc - > log2_chroma_h ) ;
} else {
res_desc . res . pitch2D . width = in - > width ;
res_desc . res . pitch2D . height = in - > height ;
}
ret = CHECK_CU ( cu - > cuTexObjectCreate ( & tex [ i ] , & res_desc , & tex_desc , NULL ) ) ;
if ( ret < 0 )
goto exit ;
2017-05-10 17:18:16 +00:00
}
2021-06-23 23:53:10 +00:00
// scale primary plane(s). Usually Y (and A), or single plane of RGB frames.
ret = call_resize_kernel ( ctx , s - > cu_func ,
tex , in - > width , in - > height ,
out , out - > width , out - > height , out - > linesize [ 0 ] ) ;
if ( ret < 0 )
goto exit ;
if ( s - > out_planes > 1 ) {
// scale UV plane. Scale function sets both U and V plane, or singular interleaved plane.
ret = call_resize_kernel ( ctx , s - > cu_func_uv , tex ,
AV_CEIL_RSHIFT ( in - > width , s - > in_desc - > log2_chroma_w ) ,
AV_CEIL_RSHIFT ( in - > height , s - > in_desc - > log2_chroma_h ) ,
out ,
AV_CEIL_RSHIFT ( out - > width , s - > out_desc - > log2_chroma_w ) ,
AV_CEIL_RSHIFT ( out - > height , s - > out_desc - > log2_chroma_h ) ,
out - > linesize [ 1 ] ) ;
if ( ret < 0 )
goto exit ;
}
exit :
for ( i = 0 ; i < s - > in_planes ; i + + )
if ( tex [ i ] )
CHECK_CU ( cu - > cuTexObjectDestroy ( tex [ i ] ) ) ;
CHECK_CU ( cu - > cuCtxPopCurrent ( & dummy ) ) ;
return ret ;
2017-05-10 17:18:16 +00:00
}
static int cudascale_scale ( AVFilterContext * ctx , AVFrame * out , AVFrame * in )
{
CUDAScaleContext * s = ctx - > priv ;
2020-11-03 17:33:55 +00:00
AVFilterLink * outlink = ctx - > outputs [ 0 ] ;
2017-05-10 17:18:16 +00:00
AVFrame * src = in ;
int ret ;
ret = scalecuda_resize ( ctx , s - > frame , src ) ;
if ( ret < 0 )
return ret ;
src = s - > frame ;
ret = av_hwframe_get_buffer ( src - > hw_frames_ctx , s - > tmp_frame , 0 ) ;
if ( ret < 0 )
return ret ;
av_frame_move_ref ( out , s - > frame ) ;
av_frame_move_ref ( s - > frame , s - > tmp_frame ) ;
2020-11-03 17:33:55 +00:00
s - > frame - > width = outlink - > w ;
s - > frame - > height = outlink - > h ;
2019-05-24 16:39:47 +00:00
2017-05-10 17:18:16 +00:00
ret = av_frame_copy_props ( out , in ) ;
if ( ret < 0 )
return ret ;
return 0 ;
}
static int cudascale_filter_frame ( AVFilterLink * link , AVFrame * in )
{
2019-02-21 03:57:51 +00:00
AVFilterContext * ctx = link - > dst ;
CUDAScaleContext * s = ctx - > priv ;
AVFilterLink * outlink = ctx - > outputs [ 0 ] ;
CudaFunctions * cu = s - > hwctx - > internal - > cuda_dl ;
2017-05-10 17:18:16 +00:00
AVFrame * out = NULL ;
CUcontext dummy ;
int ret = 0 ;
2020-11-03 17:33:55 +00:00
if ( s - > passthrough )
return ff_filter_frame ( outlink , in ) ;
2017-05-10 17:18:16 +00:00
out = av_frame_alloc ( ) ;
if ( ! out ) {
ret = AVERROR ( ENOMEM ) ;
goto fail ;
}
2019-02-21 03:57:51 +00:00
ret = CHECK_CU ( cu - > cuCtxPushCurrent ( s - > hwctx - > cuda_ctx ) ) ;
2018-11-11 06:47:28 +00:00
if ( ret < 0 )
2017-05-10 17:18:16 +00:00
goto fail ;
ret = cudascale_scale ( ctx , out , in ) ;
2019-02-21 03:57:51 +00:00
CHECK_CU ( cu - > cuCtxPopCurrent ( & dummy ) ) ;
2017-05-10 17:18:16 +00:00
if ( ret < 0 )
goto fail ;
av_reduce ( & out - > sample_aspect_ratio . num , & out - > sample_aspect_ratio . den ,
( int64_t ) in - > sample_aspect_ratio . num * outlink - > h * link - > w ,
( int64_t ) in - > sample_aspect_ratio . den * outlink - > w * link - > h ,
INT_MAX ) ;
av_frame_free ( & in ) ;
return ff_filter_frame ( outlink , out ) ;
fail :
av_frame_free ( & in ) ;
av_frame_free ( & out ) ;
return ret ;
}
2020-11-03 17:33:55 +00:00
static AVFrame * cudascale_get_video_buffer ( AVFilterLink * inlink , int w , int h )
{
CUDAScaleContext * s = inlink - > dst - > priv ;
return s - > passthrough ?
ff_null_get_video_buffer ( inlink , w , h ) :
ff_default_get_video_buffer ( inlink , w , h ) ;
}
2017-05-10 17:18:16 +00:00
# define OFFSET(x) offsetof(CUDAScaleContext, x)
# define FLAGS (AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM)
static const AVOption options [ ] = {
2020-11-04 17:10:19 +00:00
{ " w " , " Output video width " , OFFSET ( w_expr ) , AV_OPT_TYPE_STRING , { . str = " iw " } , . flags = FLAGS } ,
{ " h " , " Output video height " , OFFSET ( h_expr ) , AV_OPT_TYPE_STRING , { . str = " ih " } , . flags = FLAGS } ,
2024-02-11 14:41:05 +00:00
{ " interp_algo " , " Interpolation algorithm used for resizing " , OFFSET ( interp_algo ) , AV_OPT_TYPE_INT , { . i64 = INTERP_ALGO_DEFAULT } , 0 , INTERP_ALGO_COUNT - 1 , FLAGS , . unit = " interp_algo " } ,
{ " nearest " , " nearest neighbour " , 0 , AV_OPT_TYPE_CONST , { . i64 = INTERP_ALGO_NEAREST } , 0 , 0 , FLAGS , . unit = " interp_algo " } ,
{ " bilinear " , " bilinear " , 0 , AV_OPT_TYPE_CONST , { . i64 = INTERP_ALGO_BILINEAR } , 0 , 0 , FLAGS , . unit = " interp_algo " } ,
{ " bicubic " , " bicubic " , 0 , AV_OPT_TYPE_CONST , { . i64 = INTERP_ALGO_BICUBIC } , 0 , 0 , FLAGS , . unit = " interp_algo " } ,
{ " lanczos " , " lanczos " , 0 , AV_OPT_TYPE_CONST , { . i64 = INTERP_ALGO_LANCZOS } , 0 , 0 , FLAGS , . unit = " interp_algo " } ,
2021-06-23 23:53:10 +00:00
{ " format " , " Output video pixel format " , OFFSET ( format ) , AV_OPT_TYPE_PIXEL_FMT , { . i64 = AV_PIX_FMT_NONE } , INT_MIN , INT_MAX , . flags = FLAGS } ,
2020-11-03 17:33:55 +00:00
{ " passthrough " , " Do not process frames at all if parameters match " , OFFSET ( passthrough ) , AV_OPT_TYPE_BOOL , { . i64 = 1 } , 0 , 1 , FLAGS } ,
2020-11-04 17:10:19 +00:00
{ " param " , " Algorithm-Specific parameter " , OFFSET ( param ) , AV_OPT_TYPE_FLOAT , { . dbl = SCALE_CUDA_PARAM_DEFAULT } , - FLT_MAX , FLT_MAX , FLAGS } ,
2024-02-11 14:41:05 +00:00
{ " force_original_aspect_ratio " , " decrease or increase w/h if necessary to keep the original AR " , OFFSET ( force_original_aspect_ratio ) , AV_OPT_TYPE_INT , { . i64 = 0 } , 0 , 2 , FLAGS , . unit = " force_oar " } ,
{ " disable " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 0 } , 0 , 0 , FLAGS , . unit = " force_oar " } ,
{ " decrease " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 1 } , 0 , 0 , FLAGS , . unit = " force_oar " } ,
{ " increase " , NULL , 0 , AV_OPT_TYPE_CONST , { . i64 = 2 } , 0 , 0 , FLAGS , . unit = " force_oar " } ,
2020-11-04 17:10:19 +00:00
{ " force_divisible_by " , " enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used " , OFFSET ( force_divisible_by ) , AV_OPT_TYPE_INT , { . i64 = 1 } , 1 , 256 , FLAGS } ,
2017-05-10 17:18:16 +00:00
{ NULL } ,
} ;
static const AVClass cudascale_class = {
. class_name = " cudascale " ,
2024-01-19 12:33:28 +00:00
. item_name = av_default_item_name ,
2017-05-10 17:18:16 +00:00
. option = options ,
. version = LIBAVUTIL_VERSION_INT ,
} ;
static const AVFilterPad cudascale_inputs [ ] = {
{
. name = " default " ,
. type = AVMEDIA_TYPE_VIDEO ,
. filter_frame = cudascale_filter_frame ,
2021-08-17 03:48:27 +00:00
. get_buffer . video = cudascale_get_video_buffer ,
2017-05-10 17:18:16 +00:00
} ,
} ;
static const AVFilterPad cudascale_outputs [ ] = {
{
. name = " default " ,
. type = AVMEDIA_TYPE_VIDEO ,
. config_props = cudascale_config_props ,
} ,
} ;
2021-04-19 16:33:56 +00:00
const AVFilter ff_vf_scale_cuda = {
2017-05-10 17:18:16 +00:00
. name = " scale_cuda " ,
. description = NULL_IF_CONFIG_SMALL ( " GPU accelerated video resizer " ) ,
. init = cudascale_init ,
. uninit = cudascale_uninit ,
. priv_size = sizeof ( CUDAScaleContext ) ,
. priv_class = & cudascale_class ,
2021-08-12 11:05:31 +00:00
FILTER_INPUTS ( cudascale_inputs ) ,
FILTER_OUTPUTS ( cudascale_outputs ) ,
2017-05-10 17:18:16 +00:00
2021-09-27 20:50:51 +00:00
FILTER_SINGLE_PIXFMT ( AV_PIX_FMT_CUDA ) ,
2021-09-27 10:07:35 +00:00
2017-05-10 17:18:16 +00:00
. flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE ,
} ;