FFmpeg/libavfilter/vulkan_filter.c
Lynne a797317ab1
vulkan_filter: don't require the storage flag for the base frames format
We check for whether subformats support storage immediately below.
Those are the ones we require storage for, rather than the base format
itself.

This permits better reuse of AVHWFrame contexts.

The patch also removes an always-false check in the subformat check.
2024-08-16 01:22:17 +02:00

474 lines
18 KiB
C

/*
* Copyright (c) Lynne
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "filters.h"
#include "vulkan_filter.h"
#include "libavutil/vulkan_loader.h"
int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s,
AVBufferRef *frames_ref,
int width, int height, enum AVPixelFormat sw_format)
{
int err;
AVHWFramesContext *frames_ctx;
AVHWDeviceContext *device_ctx;
AVVulkanFramesContext *vk_frames;
AVVulkanDeviceContext *vk_dev;
AVBufferRef *device_ref = avctx->hw_device_ctx;
/* Check if context is reusable as-is */
if (frames_ref) {
int no_storage = 0;
FFVulkanFunctions *vk;
VkImageUsageFlagBits usage_req;
const VkFormat *sub = av_vkfmt_from_pixfmt(sw_format);
frames_ctx = (AVHWFramesContext *)frames_ref->data;
device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data;
vk_frames = frames_ctx->hwctx;
vk_dev = device_ctx->hwctx;
/* Width and height mismatch */
if (width != frames_ctx->width ||
height != frames_ctx->height)
goto skip;
/* Format mismatch */
if (sw_format != frames_ctx->sw_format)
goto skip;
/* Don't let linear through. */
if (vk_frames->tiling == VK_IMAGE_TILING_LINEAR)
goto skip;
s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions,
vk_dev->nb_enabled_dev_extensions);
/* More advanced format checks */
err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1);
if (err < 0)
return err;
vk = &s->vkfn;
/* Usage mismatch */
usage_req = VK_IMAGE_USAGE_SAMPLED_BIT;
/* If format supports hardware encoding, make sure
* the context includes it. */
if (vk_frames->format[1] == VK_FORMAT_UNDEFINED &&
(s->extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE |
FF_VK_EXT_VIDEO_MAINTENANCE_1))) {
VkFormatProperties3 fprops = {
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3,
};
VkFormatProperties2 prop = {
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
.pNext = &fprops,
};
vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev,
vk_frames->format[0],
&prop);
if (fprops.optimalTilingFeatures & VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR)
usage_req |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR;
}
if ((vk_frames->usage & usage_req) != usage_req)
goto skip;
/* Check if the subformats can do storage */
for (int i = 0; sub[i] != VK_FORMAT_UNDEFINED; i++) {
VkFormatProperties2 prop = {
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
};
vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev, sub[i],
&prop);
no_storage |= !(prop.formatProperties.optimalTilingFeatures &
VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
}
/* Check if it's usable */
if (no_storage) {
skip:
device_ref = frames_ctx->device_ref;
frames_ref = NULL;
} else {
frames_ref = av_buffer_ref(frames_ref);
if (!frames_ref)
return AVERROR(ENOMEM);
}
}
if (!frames_ref) {
if (!device_ref) {
av_log(avctx, AV_LOG_ERROR,
"Vulkan filtering requires a device context!\n");
return AVERROR(EINVAL);
}
frames_ref = av_hwframe_ctx_alloc(device_ref);
frames_ctx = (AVHWFramesContext *)frames_ref->data;
frames_ctx->format = AV_PIX_FMT_VULKAN;
frames_ctx->sw_format = sw_format;
frames_ctx->width = width;
frames_ctx->height = height;
err = av_hwframe_ctx_init(frames_ref);
if (err < 0) {
av_buffer_unref(&frames_ref);
return err;
}
device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data;
vk_dev = device_ctx->hwctx;
}
s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions,
vk_dev->nb_enabled_dev_extensions);
/**
* libplacebo does not use descriptor buffers.
*/
if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) &&
strcmp(avctx->filter->name, "libplacebo")) {
av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires that "
"the %s extension is supported!\n",
VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME);
av_buffer_unref(&frames_ref);
return AVERROR(EINVAL);
}
err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1);
if (err < 0) {
av_buffer_unref(&frames_ref);
return err;
}
s->frames_ref = frames_ref;
s->frames = frames_ctx;
s->hwfc = vk_frames;
s->device = device_ctx;
s->hwctx = device_ctx->hwctx;
err = ff_vk_load_props(s);
if (err < 0)
av_buffer_unref(&s->frames_ref);
return err;
}
int ff_vk_filter_config_input(AVFilterLink *inlink)
{
FilterLink *l = ff_filter_link(inlink);
AVHWFramesContext *input_frames;
AVFilterContext *avctx = inlink->dst;
FFVulkanContext *s = inlink->dst->priv;
if (!l->hw_frames_ctx) {
av_log(inlink->dst, AV_LOG_ERROR, "Vulkan filtering requires a "
"hardware frames context on the input.\n");
return AVERROR(EINVAL);
}
input_frames = (AVHWFramesContext *)l->hw_frames_ctx->data;
if (input_frames->format != AV_PIX_FMT_VULKAN)
return AVERROR(EINVAL);
/* Extract the device and default output format from the first input. */
if (avctx->inputs[0] != inlink)
return 0;
/* Save the ref, without reffing it */
s->input_frames_ref = l->hw_frames_ctx;
/* Defaults */
s->input_format = input_frames->sw_format;
s->output_format = input_frames->sw_format;
s->output_width = inlink->w;
s->output_height = inlink->h;
return 0;
}
int ff_vk_filter_config_output(AVFilterLink *outlink)
{
int err;
FilterLink *l = ff_filter_link(outlink);
FFVulkanContext *s = outlink->src->priv;
av_buffer_unref(&l->hw_frames_ctx);
err = ff_vk_filter_init_context(outlink->src, s, s->input_frames_ref,
s->output_width, s->output_height,
s->output_format);
if (err < 0)
return err;
l->hw_frames_ctx = av_buffer_ref(s->frames_ref);
if (!l->hw_frames_ctx)
return AVERROR(ENOMEM);
outlink->w = s->output_width;
outlink->h = s->output_height;
return err;
}
int ff_vk_filter_init(AVFilterContext *avctx)
{
FFVulkanContext *s = avctx->priv;
s->output_format = AV_PIX_FMT_NONE;
return 0;
}
int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e,
FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f,
VkSampler sampler, void *push_src, size_t push_size)
{
int err = 0;
FFVulkanFunctions *vk = &vkctx->vkfn;
VkImageView in_views[AV_NUM_DATA_POINTERS];
VkImageView out_views[AV_NUM_DATA_POINTERS];
VkImageMemoryBarrier2 img_bar[37];
int nb_img_bar = 0;
/* Update descriptors and init the exec context */
FFVkExecContext *exec = ff_vk_exec_get(e);
ff_vk_exec_start(vkctx, exec);
ff_vk_exec_bind_pipeline(vkctx, exec, pl);
if (push_src)
ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
0, push_size, push_src);
if (in_f) {
RET(ff_vk_exec_add_dep_frame(vkctx, exec, in_f,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, in_views, in_f));
ff_vk_update_descriptor_img_array(vkctx, pl, exec, in_f, in_views, 0, 0,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
sampler);
ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_QUEUE_FAMILY_IGNORED);
}
RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f));
ff_vk_update_descriptor_img_array(vkctx, pl, exec, out_f, out_views, 0, !!in_f,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
ff_vk_frame_barrier(vkctx, exec, out_f, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
});
vk->CmdDispatch(exec->buf,
FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
pl->wg_size[2]);
return ff_vk_exec_submit(vkctx, exec);
fail:
ff_vk_exec_discard_deps(vkctx, exec);
return err;
}
int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e,
FFVulkanPipeline *pls[2],
AVFrame *out, AVFrame *tmp, AVFrame *in,
VkSampler sampler, void *push_src, size_t push_size)
{
int err = 0;
FFVulkanFunctions *vk = &vkctx->vkfn;
VkImageView in_views[AV_NUM_DATA_POINTERS];
VkImageView tmp_views[AV_NUM_DATA_POINTERS];
VkImageView out_views[AV_NUM_DATA_POINTERS];
VkImageMemoryBarrier2 img_bar[37];
int nb_img_bar = 0;
/* Update descriptors and init the exec context */
FFVkExecContext *exec = ff_vk_exec_get(e);
ff_vk_exec_start(vkctx, exec);
RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_exec_add_dep_frame(vkctx, exec, tmp,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, in_views, in));
RET(ff_vk_create_imageviews(vkctx, exec, tmp_views, tmp));
RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_QUEUE_FAMILY_IGNORED);
ff_vk_frame_barrier(vkctx, exec, tmp, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
});
for (int i = 0; i < 2; i++) {
FFVulkanPipeline *pl = pls[i];
AVFrame *src_f = !i ? in : tmp;
AVFrame *dst_f = !i ? tmp : out;
VkImageView *src_views = !i ? in_views : tmp_views;
VkImageView *dst_views = !i ? tmp_views : out_views;
ff_vk_exec_bind_pipeline(vkctx, exec, pl);
if (push_src)
ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
0, push_size, push_src);
ff_vk_update_descriptor_img_array(vkctx, pl, exec, src_f, src_views, 0, 0,
!i ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL :
VK_IMAGE_LAYOUT_GENERAL,
sampler);
ff_vk_update_descriptor_img_array(vkctx, pl, exec, dst_f, dst_views, 0, 1,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
vk->CmdDispatch(exec->buf,
FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
pl->wg_size[2]);
}
return ff_vk_exec_submit(vkctx, exec);
fail:
ff_vk_exec_discard_deps(vkctx, exec);
return err;
}
int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e,
FFVulkanPipeline *pl,
AVFrame *out, AVFrame *in[], int nb_in,
VkSampler sampler, void *push_src, size_t push_size)
{
int err = 0;
FFVulkanFunctions *vk = &vkctx->vkfn;
VkImageView in_views[16][AV_NUM_DATA_POINTERS];
VkImageView out_views[AV_NUM_DATA_POINTERS];
VkImageMemoryBarrier2 img_bar[128];
int nb_img_bar = 0;
/* Update descriptors and init the exec context */
FFVkExecContext *exec = ff_vk_exec_get(e);
ff_vk_exec_start(vkctx, exec);
/* Inputs */
for (int i = 0; i < nb_in; i++) {
RET(ff_vk_exec_add_dep_frame(vkctx, exec, in[i],
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, in_views[i], in[i]));
ff_vk_frame_barrier(vkctx, exec, in[i], img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_QUEUE_FAMILY_IGNORED);
}
/* Output */
RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
});
ff_vk_exec_bind_pipeline(vkctx, exec, pl);
if (push_src)
ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
0, push_size, push_src);
for (int i = 0; i < nb_in; i++)
ff_vk_update_descriptor_img_array(vkctx, pl, exec, in[i], in_views[i], 0, i,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
sampler);
ff_vk_update_descriptor_img_array(vkctx, pl, exec, out, out_views, 0, nb_in,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
vk->CmdDispatch(exec->buf,
FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
pl->wg_size[2]);
return ff_vk_exec_submit(vkctx, exec);
fail:
ff_vk_exec_discard_deps(vkctx, exec);
return err;
}