FFmpeg/libavfilter/vulkan_filter.c
Lynne 8a7af4aa49
vulkan: add support for regular descriptor pools
This permits:
 - The use of Vulkan filtering on many more devices
 - Better debugging due to lack of descriptor buffer support in layers

Much of the changes here are due to a requirement that updates to
descriptors must happen between the command buffer being waited on,
and the pipeline not being bound.

We routinely did it the other way around, by updating only after
we bind the pipeline.
2024-09-23 13:40:38 +02:00

473 lines
18 KiB
C

/*
* Copyright (c) Lynne
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "filters.h"
#include "vulkan_filter.h"
#include "libavutil/vulkan_loader.h"
int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s,
AVBufferRef *frames_ref,
int width, int height, enum AVPixelFormat sw_format)
{
int err;
AVHWFramesContext *frames_ctx;
AVHWDeviceContext *device_ctx;
AVVulkanFramesContext *vk_frames;
AVVulkanDeviceContext *vk_dev;
AVBufferRef *device_ref = avctx->hw_device_ctx;
/* Check if context is reusable as-is */
if (frames_ref) {
int no_storage = 0;
FFVulkanFunctions *vk;
VkImageUsageFlagBits usage_req;
const VkFormat *sub = av_vkfmt_from_pixfmt(sw_format);
frames_ctx = (AVHWFramesContext *)frames_ref->data;
device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data;
vk_frames = frames_ctx->hwctx;
vk_dev = device_ctx->hwctx;
/* Width and height mismatch */
if (width != frames_ctx->width ||
height != frames_ctx->height)
goto skip;
/* Format mismatch */
if (sw_format != frames_ctx->sw_format)
goto skip;
/* Don't let linear through. */
if (vk_frames->tiling == VK_IMAGE_TILING_LINEAR)
goto skip;
s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions,
vk_dev->nb_enabled_dev_extensions);
/* More advanced format checks */
err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1);
if (err < 0)
return err;
vk = &s->vkfn;
/* Usage mismatch */
usage_req = VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_STORAGE_BIT;
/* If format supports hardware encoding, make sure
* the context includes it. */
if (vk_frames->format[1] == VK_FORMAT_UNDEFINED &&
(s->extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE |
FF_VK_EXT_VIDEO_MAINTENANCE_1))) {
VkFormatProperties3 fprops = {
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3,
};
VkFormatProperties2 prop = {
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
.pNext = &fprops,
};
vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev,
vk_frames->format[0],
&prop);
if (fprops.optimalTilingFeatures & VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR)
usage_req |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR;
}
if ((vk_frames->usage & usage_req) != usage_req)
goto skip;
/* Check if the subformats can do storage */
for (int i = 0; sub[i] != VK_FORMAT_UNDEFINED; i++) {
VkFormatProperties2 prop = {
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
};
vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev, sub[i],
&prop);
no_storage |= !(prop.formatProperties.optimalTilingFeatures &
VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
}
/* Check if it's usable */
if (no_storage) {
skip:
av_log(avctx, AV_LOG_VERBOSE, "Cannot reuse context, creating a new one\n");
device_ref = frames_ctx->device_ref;
frames_ref = NULL;
} else {
av_log(avctx, AV_LOG_VERBOSE, "Reusing existing frames context\n");
frames_ref = av_buffer_ref(frames_ref);
if (!frames_ref)
return AVERROR(ENOMEM);
}
}
if (!frames_ref) {
if (!device_ref) {
av_log(avctx, AV_LOG_ERROR,
"Vulkan filtering requires a device context!\n");
return AVERROR(EINVAL);
}
frames_ref = av_hwframe_ctx_alloc(device_ref);
frames_ctx = (AVHWFramesContext *)frames_ref->data;
frames_ctx->format = AV_PIX_FMT_VULKAN;
frames_ctx->sw_format = sw_format;
frames_ctx->width = width;
frames_ctx->height = height;
vk_frames = frames_ctx->hwctx;
vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL;
vk_frames->usage = VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_STORAGE_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
err = av_hwframe_ctx_init(frames_ref);
if (err < 0) {
av_buffer_unref(&frames_ref);
return err;
}
device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data;
vk_dev = device_ctx->hwctx;
}
s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions,
vk_dev->nb_enabled_dev_extensions);
err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1);
if (err < 0) {
av_buffer_unref(&frames_ref);
return err;
}
s->frames_ref = frames_ref;
s->frames = frames_ctx;
s->hwfc = vk_frames;
s->device = device_ctx;
s->hwctx = device_ctx->hwctx;
err = ff_vk_load_props(s);
if (err < 0)
av_buffer_unref(&s->frames_ref);
return err;
}
int ff_vk_filter_config_input(AVFilterLink *inlink)
{
FilterLink *l = ff_filter_link(inlink);
AVHWFramesContext *input_frames;
AVFilterContext *avctx = inlink->dst;
FFVulkanContext *s = inlink->dst->priv;
if (!l->hw_frames_ctx) {
av_log(inlink->dst, AV_LOG_ERROR, "Vulkan filtering requires a "
"hardware frames context on the input.\n");
return AVERROR(EINVAL);
}
input_frames = (AVHWFramesContext *)l->hw_frames_ctx->data;
if (input_frames->format != AV_PIX_FMT_VULKAN)
return AVERROR(EINVAL);
/* Extract the device and default output format from the first input. */
if (avctx->inputs[0] != inlink)
return 0;
/* Save the ref, without reffing it */
s->input_frames_ref = l->hw_frames_ctx;
/* Defaults */
s->input_format = input_frames->sw_format;
s->output_format = input_frames->sw_format;
s->output_width = inlink->w;
s->output_height = inlink->h;
return 0;
}
int ff_vk_filter_config_output(AVFilterLink *outlink)
{
int err;
FilterLink *l = ff_filter_link(outlink);
FFVulkanContext *s = outlink->src->priv;
av_buffer_unref(&l->hw_frames_ctx);
err = ff_vk_filter_init_context(outlink->src, s, s->input_frames_ref,
s->output_width, s->output_height,
s->output_format);
if (err < 0)
return err;
l->hw_frames_ctx = av_buffer_ref(s->frames_ref);
if (!l->hw_frames_ctx)
return AVERROR(ENOMEM);
outlink->w = s->output_width;
outlink->h = s->output_height;
return err;
}
int ff_vk_filter_init(AVFilterContext *avctx)
{
FFVulkanContext *s = avctx->priv;
s->output_format = AV_PIX_FMT_NONE;
return 0;
}
int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e,
FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f,
VkSampler sampler, void *push_src, size_t push_size)
{
int err = 0;
FFVulkanFunctions *vk = &vkctx->vkfn;
VkImageView in_views[AV_NUM_DATA_POINTERS];
VkImageView out_views[AV_NUM_DATA_POINTERS];
VkImageMemoryBarrier2 img_bar[37];
int nb_img_bar = 0;
/* Update descriptors and init the exec context */
FFVkExecContext *exec = ff_vk_exec_get(e);
ff_vk_exec_start(vkctx, exec);
RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f));
ff_vk_update_descriptor_img_array(vkctx, pl, exec, out_f, out_views, 0, !!in_f,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
if (in_f) {
RET(ff_vk_exec_add_dep_frame(vkctx, exec, in_f,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, in_views, in_f));
ff_vk_update_descriptor_img_array(vkctx, pl, exec, in_f, in_views, 0, 0,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
sampler);
}
/* Bind pipeline, update push data */
ff_vk_exec_bind_pipeline(vkctx, exec, pl);
if (push_src)
ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
0, push_size, push_src);
/* Add data sync barriers */
ff_vk_frame_barrier(vkctx, exec, out_f, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
if (in_f)
ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_QUEUE_FAMILY_IGNORED);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
});
vk->CmdDispatch(exec->buf,
FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
pl->wg_size[2]);
return ff_vk_exec_submit(vkctx, exec);
fail:
ff_vk_exec_discard_deps(vkctx, exec);
return err;
}
int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e,
FFVulkanPipeline *pls[2],
AVFrame *out, AVFrame *tmp, AVFrame *in,
VkSampler sampler, void *push_src, size_t push_size)
{
int err = 0;
FFVulkanFunctions *vk = &vkctx->vkfn;
VkImageView in_views[AV_NUM_DATA_POINTERS];
VkImageView tmp_views[AV_NUM_DATA_POINTERS];
VkImageView out_views[AV_NUM_DATA_POINTERS];
VkImageMemoryBarrier2 img_bar[37];
int nb_img_bar = 0;
/* Update descriptors and init the exec context */
FFVkExecContext *exec = ff_vk_exec_get(e);
ff_vk_exec_start(vkctx, exec);
RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_exec_add_dep_frame(vkctx, exec, tmp,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, in_views, in));
RET(ff_vk_create_imageviews(vkctx, exec, tmp_views, tmp));
RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_QUEUE_FAMILY_IGNORED);
ff_vk_frame_barrier(vkctx, exec, tmp, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
});
for (int i = 0; i < 2; i++) {
FFVulkanPipeline *pl = pls[i];
AVFrame *src_f = !i ? in : tmp;
AVFrame *dst_f = !i ? tmp : out;
VkImageView *src_views = !i ? in_views : tmp_views;
VkImageView *dst_views = !i ? tmp_views : out_views;
ff_vk_update_descriptor_img_array(vkctx, pl, exec, src_f, src_views, 0, 0,
!i ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL :
VK_IMAGE_LAYOUT_GENERAL,
sampler);
ff_vk_update_descriptor_img_array(vkctx, pl, exec, dst_f, dst_views, 0, 1,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
/* Bind pipeline, update push data */
ff_vk_exec_bind_pipeline(vkctx, exec, pl);
if (push_src)
ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
0, push_size, push_src);
vk->CmdDispatch(exec->buf,
FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
pl->wg_size[2]);
}
return ff_vk_exec_submit(vkctx, exec);
fail:
ff_vk_exec_discard_deps(vkctx, exec);
return err;
}
int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e,
FFVulkanPipeline *pl,
AVFrame *out, AVFrame *in[], int nb_in,
VkSampler sampler, void *push_src, size_t push_size)
{
int err = 0;
FFVulkanFunctions *vk = &vkctx->vkfn;
VkImageView in_views[16][AV_NUM_DATA_POINTERS];
VkImageView out_views[AV_NUM_DATA_POINTERS];
VkImageMemoryBarrier2 img_bar[128];
int nb_img_bar = 0;
/* Update descriptors and init the exec context */
FFVkExecContext *exec = ff_vk_exec_get(e);
ff_vk_exec_start(vkctx, exec);
/* Add deps and create temporary imageviews */
RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
for (int i = 0; i < nb_in; i++) {
RET(ff_vk_exec_add_dep_frame(vkctx, exec, in[i],
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, in_views[i], in[i]));
}
/* Update descriptor sets */
ff_vk_update_descriptor_img_array(vkctx, pl, exec, out, out_views, 0, nb_in,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
for (int i = 0; i < nb_in; i++)
ff_vk_update_descriptor_img_array(vkctx, pl, exec, in[i], in_views[i], 0, i,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
sampler);
/* Bind pipeline, update push data */
ff_vk_exec_bind_pipeline(vkctx, exec, pl);
if (push_src)
ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
0, push_size, push_src);
/* Add data sync barriers */
ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
for (int i = 0; i < nb_in; i++)
ff_vk_frame_barrier(vkctx, exec, in[i], img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_QUEUE_FAMILY_IGNORED);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
});
vk->CmdDispatch(exec->buf,
FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
pl->wg_size[2]);
return ff_vk_exec_submit(vkctx, exec);
fail:
ff_vk_exec_discard_deps(vkctx, exec);
return err;
}