hwcontext_vulkan: add a new mechanism to expose used queue families

The issue with the old mechanism is that we had to introduce new
API each time we needed a new queue family, and all the queue families
were functionally fixed to a given purpose.

Nvidia's GPUs are able to handle video encoding and compute on the
same queue, which results in a speedup when pre-processing is required.

Also, this enables us to expose optical flow queues for frame interpolation.
This commit is contained in:
Lynne 2024-07-09 03:03:19 +02:00
parent d88a988d3d
commit 13489c8a21
No known key found for this signature in database
GPG Key ID: A2FEA5F03F034464
3 changed files with 94 additions and 18 deletions

View File

@ -1423,12 +1423,13 @@ static void unlock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t
static int vulkan_device_init(AVHWDeviceContext *ctx)
{
int err;
int err = 0;
uint32_t qf_num;
VulkanDevicePriv *p = ctx->hwctx;
AVVulkanDeviceContext *hwctx = &p->p;
FFVulkanFunctions *vk = &p->vkctx.vkfn;
VkQueueFamilyProperties *qf;
VkQueueFamilyProperties2 *qf;
VkQueueFamilyVideoPropertiesKHR *qf_vid;
int graph_index, comp_index, tx_index, enc_index, dec_index;
/* Set device extension flags */
@ -1474,38 +1475,53 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
return AVERROR_EXTERNAL;
}
qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties));
qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties2));
if (!qf)
return AVERROR(ENOMEM);
vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf);
qf_vid = av_malloc_array(qf_num, sizeof(VkQueueFamilyVideoPropertiesKHR));
if (!qf_vid) {
av_free(qf);
return AVERROR(ENOMEM);
}
for (uint32_t i = 0; i < qf_num; i++) {
qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) {
.sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
};
qf[i] = (VkQueueFamilyProperties2) {
.sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
.pNext = &qf_vid[i],
};
}
vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &qf_num, qf);
p->qf_mutex = av_calloc(qf_num, sizeof(*p->qf_mutex));
if (!p->qf_mutex) {
av_free(qf);
return AVERROR(ENOMEM);
err = AVERROR(ENOMEM);
goto end;
}
p->nb_tot_qfs = qf_num;
for (uint32_t i = 0; i < qf_num; i++) {
p->qf_mutex[i] = av_calloc(qf[i].queueCount, sizeof(**p->qf_mutex));
p->qf_mutex[i] = av_calloc(qf[i].queueFamilyProperties.queueCount,
sizeof(**p->qf_mutex));
if (!p->qf_mutex[i]) {
av_free(qf);
return AVERROR(ENOMEM);
err = AVERROR(ENOMEM);
goto end;
}
for (uint32_t j = 0; j < qf[i].queueCount; j++) {
for (uint32_t j = 0; j < qf[i].queueFamilyProperties.queueCount; j++) {
err = pthread_mutex_init(&p->qf_mutex[i][j], NULL);
if (err != 0) {
av_log(ctx, AV_LOG_ERROR, "pthread_mutex_init failed : %s\n",
av_err2str(err));
av_free(qf);
return AVERROR(err);
err = AVERROR(err);
goto end;
}
}
}
av_free(qf);
graph_index = hwctx->nb_graphics_queues ? hwctx->queue_family_index : -1;
comp_index = hwctx->nb_comp_queues ? hwctx->queue_family_comp_index : -1;
tx_index = hwctx->nb_tx_queues ? hwctx->queue_family_tx_index : -1;
@ -1517,13 +1533,15 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
if (ctx_qf < 0 && required) { \
av_log(ctx, AV_LOG_ERROR, "%s queue family is required, but marked as missing" \
" in the context!\n", type); \
return AVERROR(EINVAL); \
err = AVERROR(EINVAL); \
goto end; \
} else if (fidx < 0 || ctx_qf < 0) { \
break; \
} else if (ctx_qf >= qf_num) { \
av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
type, ctx_qf, qf_num); \
return AVERROR(EINVAL); \
err = AVERROR(EINVAL); \
goto end; \
} \
\
av_log(ctx, AV_LOG_VERBOSE, "Using queue family %i (queues: %i)" \
@ -1550,6 +1568,36 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
#undef CHECK_QUEUE
/* Update the new queue family fields. If non-zero already,
* it means API users have set it. */
if (!hwctx->nb_qf) {
#define ADD_QUEUE(ctx_qf, qc, flag) \
do { \
if (ctx_qf != -1) { \
hwctx->qf[hwctx->nb_qf++] = (AVVulkanDeviceQueueFamily) { \
.idx = ctx_qf, \
.num = qc, \
.flags = flag, \
}; \
} \
} while (0)
ADD_QUEUE(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT);
ADD_QUEUE(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT);
ADD_QUEUE(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT);
ADD_QUEUE(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
ADD_QUEUE(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
#undef ADD_QUEUE
}
for (int i = 0; i < hwctx->nb_qf; i++) {
if (!hwctx->qf[i].video_caps &&
hwctx->qf[i].flags & (VK_QUEUE_VIDEO_DECODE_BIT_KHR |
VK_QUEUE_VIDEO_ENCODE_BIT_KHR)) {
hwctx->qf[i].video_caps = qf_vid[hwctx->qf[i].idx].videoCodecOperations;
}
}
if (!hwctx->lock_queue)
hwctx->lock_queue = lock_queue;
if (!hwctx->unlock_queue)
@ -1565,7 +1613,10 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
ff_vk_qf_init(&p->vkctx, &p->compute_qf, VK_QUEUE_COMPUTE_BIT);
ff_vk_qf_init(&p->vkctx, &p->transfer_qf, VK_QUEUE_TRANSFER_BIT);
return 0;
end:
av_free(qf_vid);
av_free(qf);
return err;
}
static int vulkan_device_create(AVHWDeviceContext *ctx, const char *device,

View File

@ -30,6 +30,20 @@
typedef struct AVVkFrame AVVkFrame;
typedef struct AVVulkanDeviceQueueFamily {
/* Queue family index */
int idx;
/* Number of queues in the queue family in use */
int num;
/* Queue family capabilities. Must be non-zero.
* Flags may be removed to indicate the queue family may not be used
* for a given purpose. */
VkQueueFlagBits flags;
/* Vulkan implementations are allowed to list multiple video queues
* which differ in what they can encode or decode. */
VkVideoCodecOperationFlagBitsKHR video_caps;
} AVVulkanDeviceQueueFamily;
/**
* @file
* API-specific header for AV_HWDEVICE_TYPE_VULKAN.
@ -151,6 +165,17 @@ typedef struct AVVulkanDeviceContext {
* Similar to lock_queue(), unlocks a queue. Must only be called after locking.
*/
void (*unlock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index);
/**
* Queue families used. Must be preferentially ordered. List may contain
* duplicates.
*
* For compatibility reasons, all the enabled queue families listed above
* (queue_family_(tx/comp/encode/decode)_index) must also be included in
* this list until they're removed after deprecation.
*/
AVVulkanDeviceQueueFamily qf[64];
int nb_qf;
} AVVulkanDeviceContext;
/**

View File

@ -79,7 +79,7 @@
*/
#define LIBAVUTIL_VERSION_MAJOR 59
#define LIBAVUTIL_VERSION_MINOR 33
#define LIBAVUTIL_VERSION_MINOR 34
#define LIBAVUTIL_VERSION_MICRO 100
#define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \