FFmpeg/libavfilter/vulkan_filter.c
Lynne 18d964fc2c
vulkan: enable encoding of images if video_maintenance1 is enabled
Vulkan encoding was designed in a very... consolidated way.
You had to know the exact codec and profile that the image was going to
eventually be encoded as at... image creation time. Unfortunately, as good
as our code is, glimpsing into the exact future isn't what its capable of.

video_maintenance1 removed that requirement, which only then made encoding
images practically possible.
2024-08-16 01:22:16 +02:00

481 lines
18 KiB
C

/*
* Copyright (c) Lynne
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "filters.h"
#include "vulkan_filter.h"
#include "libavutil/vulkan_loader.h"
int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s,
AVBufferRef *frames_ref,
int width, int height, enum AVPixelFormat sw_format)
{
int err;
AVHWFramesContext *frames_ctx;
AVHWDeviceContext *device_ctx;
AVVulkanFramesContext *vk_frames;
AVVulkanDeviceContext *vk_dev;
AVBufferRef *device_ref = avctx->hw_device_ctx;
/* Check if context is reusable as-is */
if (frames_ref) {
int no_storage = 0;
FFVulkanFunctions *vk;
VkImageUsageFlagBits usage_req;
const VkFormat *sub = av_vkfmt_from_pixfmt(sw_format);
frames_ctx = (AVHWFramesContext *)frames_ref->data;
device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data;
vk_frames = frames_ctx->hwctx;
vk_dev = device_ctx->hwctx;
/* Width and height mismatch */
if (width != frames_ctx->width ||
height != frames_ctx->height)
goto skip;
/* Format mismatch */
if (sw_format != frames_ctx->sw_format)
goto skip;
/* Unusual tiling mismatch. Don't let linear through either. */
if (vk_frames->tiling != VK_IMAGE_TILING_OPTIMAL)
goto skip;
s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions,
vk_dev->nb_enabled_dev_extensions);
/* More advanced format checks */
err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1);
if (err < 0)
return err;
vk = &s->vkfn;
/* Usage mismatch */
usage_req = VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_STORAGE_BIT;
/* If format supports hardware encoding, make sure
* the context includes it. */
if (vk_frames->format[1] == VK_FORMAT_UNDEFINED &&
(s->extensions & (FF_VK_EXT_VIDEO_ENCODE_QUEUE |
FF_VK_EXT_VIDEO_MAINTENANCE_1))) {
VkFormatProperties3 fprops = {
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3,
};
VkFormatProperties2 prop = {
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
.pNext = &fprops,
};
vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev,
vk_frames->format[0],
&prop);
if (fprops.optimalTilingFeatures & VK_FORMAT_FEATURE_2_VIDEO_ENCODE_INPUT_BIT_KHR)
usage_req |= VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR;
}
if ((vk_frames->usage & usage_req) != usage_req)
goto skip;
/* Check if the subformats can do storage */
for (int i = 0; sub[i] != VK_FORMAT_UNDEFINED; i++) {
VkFormatProperties2 prop = {
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
};
vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev, sub[i],
&prop);
if (vk_frames->tiling == VK_IMAGE_TILING_LINEAR) {
no_storage |= !(prop.formatProperties.linearTilingFeatures &
VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
} else {
no_storage |= !(prop.formatProperties.optimalTilingFeatures &
VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
}
}
/* Check if it's usable */
if (no_storage) {
skip:
device_ref = frames_ctx->device_ref;
frames_ref = NULL;
} else {
frames_ref = av_buffer_ref(frames_ref);
if (!frames_ref)
return AVERROR(ENOMEM);
}
}
if (!frames_ref) {
if (!device_ref) {
av_log(avctx, AV_LOG_ERROR,
"Vulkan filtering requires a device context!\n");
return AVERROR(EINVAL);
}
frames_ref = av_hwframe_ctx_alloc(device_ref);
frames_ctx = (AVHWFramesContext *)frames_ref->data;
frames_ctx->format = AV_PIX_FMT_VULKAN;
frames_ctx->sw_format = sw_format;
frames_ctx->width = width;
frames_ctx->height = height;
err = av_hwframe_ctx_init(frames_ref);
if (err < 0) {
av_buffer_unref(&frames_ref);
return err;
}
device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data;
vk_dev = device_ctx->hwctx;
}
s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions,
vk_dev->nb_enabled_dev_extensions);
/**
* libplacebo does not use descriptor buffers.
*/
if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) &&
strcmp(avctx->filter->name, "libplacebo")) {
av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires that "
"the %s extension is supported!\n",
VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME);
av_buffer_unref(&frames_ref);
return AVERROR(EINVAL);
}
err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1);
if (err < 0) {
av_buffer_unref(&frames_ref);
return err;
}
s->frames_ref = frames_ref;
s->frames = frames_ctx;
s->hwfc = vk_frames;
s->device = device_ctx;
s->hwctx = device_ctx->hwctx;
err = ff_vk_load_props(s);
if (err < 0)
av_buffer_unref(&s->frames_ref);
return err;
}
int ff_vk_filter_config_input(AVFilterLink *inlink)
{
FilterLink *l = ff_filter_link(inlink);
AVHWFramesContext *input_frames;
AVFilterContext *avctx = inlink->dst;
FFVulkanContext *s = inlink->dst->priv;
if (!l->hw_frames_ctx) {
av_log(inlink->dst, AV_LOG_ERROR, "Vulkan filtering requires a "
"hardware frames context on the input.\n");
return AVERROR(EINVAL);
}
input_frames = (AVHWFramesContext *)l->hw_frames_ctx->data;
if (input_frames->format != AV_PIX_FMT_VULKAN)
return AVERROR(EINVAL);
/* Extract the device and default output format from the first input. */
if (avctx->inputs[0] != inlink)
return 0;
/* Save the ref, without reffing it */
s->input_frames_ref = l->hw_frames_ctx;
/* Defaults */
s->input_format = input_frames->sw_format;
s->output_format = input_frames->sw_format;
s->output_width = inlink->w;
s->output_height = inlink->h;
return 0;
}
int ff_vk_filter_config_output(AVFilterLink *outlink)
{
int err;
FilterLink *l = ff_filter_link(outlink);
FFVulkanContext *s = outlink->src->priv;
av_buffer_unref(&l->hw_frames_ctx);
err = ff_vk_filter_init_context(outlink->src, s, s->input_frames_ref,
s->output_width, s->output_height,
s->output_format);
if (err < 0)
return err;
l->hw_frames_ctx = av_buffer_ref(s->frames_ref);
if (!l->hw_frames_ctx)
return AVERROR(ENOMEM);
outlink->w = s->output_width;
outlink->h = s->output_height;
return err;
}
int ff_vk_filter_init(AVFilterContext *avctx)
{
FFVulkanContext *s = avctx->priv;
s->output_format = AV_PIX_FMT_NONE;
return 0;
}
int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e,
FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f,
VkSampler sampler, void *push_src, size_t push_size)
{
int err = 0;
FFVulkanFunctions *vk = &vkctx->vkfn;
VkImageView in_views[AV_NUM_DATA_POINTERS];
VkImageView out_views[AV_NUM_DATA_POINTERS];
VkImageMemoryBarrier2 img_bar[37];
int nb_img_bar = 0;
/* Update descriptors and init the exec context */
FFVkExecContext *exec = ff_vk_exec_get(e);
ff_vk_exec_start(vkctx, exec);
ff_vk_exec_bind_pipeline(vkctx, exec, pl);
if (push_src)
ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
0, push_size, push_src);
if (in_f) {
RET(ff_vk_exec_add_dep_frame(vkctx, exec, in_f,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, in_views, in_f));
ff_vk_update_descriptor_img_array(vkctx, pl, exec, in_f, in_views, 0, 0,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
sampler);
ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_QUEUE_FAMILY_IGNORED);
}
RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f));
ff_vk_update_descriptor_img_array(vkctx, pl, exec, out_f, out_views, 0, !!in_f,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
ff_vk_frame_barrier(vkctx, exec, out_f, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
});
vk->CmdDispatch(exec->buf,
FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
pl->wg_size[2]);
return ff_vk_exec_submit(vkctx, exec);
fail:
ff_vk_exec_discard_deps(vkctx, exec);
return err;
}
int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e,
FFVulkanPipeline *pls[2],
AVFrame *out, AVFrame *tmp, AVFrame *in,
VkSampler sampler, void *push_src, size_t push_size)
{
int err = 0;
FFVulkanFunctions *vk = &vkctx->vkfn;
VkImageView in_views[AV_NUM_DATA_POINTERS];
VkImageView tmp_views[AV_NUM_DATA_POINTERS];
VkImageView out_views[AV_NUM_DATA_POINTERS];
VkImageMemoryBarrier2 img_bar[37];
int nb_img_bar = 0;
/* Update descriptors and init the exec context */
FFVkExecContext *exec = ff_vk_exec_get(e);
ff_vk_exec_start(vkctx, exec);
RET(ff_vk_exec_add_dep_frame(vkctx, exec, in,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_exec_add_dep_frame(vkctx, exec, tmp,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, in_views, in));
RET(ff_vk_create_imageviews(vkctx, exec, tmp_views, tmp));
RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_QUEUE_FAMILY_IGNORED);
ff_vk_frame_barrier(vkctx, exec, tmp, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
});
for (int i = 0; i < 2; i++) {
FFVulkanPipeline *pl = pls[i];
AVFrame *src_f = !i ? in : tmp;
AVFrame *dst_f = !i ? tmp : out;
VkImageView *src_views = !i ? in_views : tmp_views;
VkImageView *dst_views = !i ? tmp_views : out_views;
ff_vk_exec_bind_pipeline(vkctx, exec, pl);
if (push_src)
ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
0, push_size, push_src);
ff_vk_update_descriptor_img_array(vkctx, pl, exec, src_f, src_views, 0, 0,
!i ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL :
VK_IMAGE_LAYOUT_GENERAL,
sampler);
ff_vk_update_descriptor_img_array(vkctx, pl, exec, dst_f, dst_views, 0, 1,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
vk->CmdDispatch(exec->buf,
FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
pl->wg_size[2]);
}
return ff_vk_exec_submit(vkctx, exec);
fail:
ff_vk_exec_discard_deps(vkctx, exec);
return err;
}
int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e,
FFVulkanPipeline *pl,
AVFrame *out, AVFrame *in[], int nb_in,
VkSampler sampler, void *push_src, size_t push_size)
{
int err = 0;
FFVulkanFunctions *vk = &vkctx->vkfn;
VkImageView in_views[16][AV_NUM_DATA_POINTERS];
VkImageView out_views[AV_NUM_DATA_POINTERS];
VkImageMemoryBarrier2 img_bar[128];
int nb_img_bar = 0;
/* Update descriptors and init the exec context */
FFVkExecContext *exec = ff_vk_exec_get(e);
ff_vk_exec_start(vkctx, exec);
/* Inputs */
for (int i = 0; i < nb_in; i++) {
RET(ff_vk_exec_add_dep_frame(vkctx, exec, in[i],
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, in_views[i], in[i]));
ff_vk_frame_barrier(vkctx, exec, in[i], img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_QUEUE_FAMILY_IGNORED);
}
/* Output */
RET(ff_vk_exec_add_dep_frame(vkctx, exec, out,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
RET(ff_vk_create_imageviews(vkctx, exec, out_views, out));
ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL,
VK_QUEUE_FAMILY_IGNORED);
vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
.sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
.pImageMemoryBarriers = img_bar,
.imageMemoryBarrierCount = nb_img_bar,
});
ff_vk_exec_bind_pipeline(vkctx, exec, pl);
if (push_src)
ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT,
0, push_size, push_src);
for (int i = 0; i < nb_in; i++)
ff_vk_update_descriptor_img_array(vkctx, pl, exec, in[i], in_views[i], 0, i,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
sampler);
ff_vk_update_descriptor_img_array(vkctx, pl, exec, out, out_views, 0, nb_in,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
vk->CmdDispatch(exec->buf,
FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0],
FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1],
pl->wg_size[2]);
return ff_vk_exec_submit(vkctx, exec);
fail:
ff_vk_exec_discard_deps(vkctx, exec);
return err;
}