lavc/hevc_ps: parse VPS extension

Only implementing what's needed for MV-HEVC with two views.

Signed-off-by: Anton Khirnov <anton@khirnov.net>
This commit is contained in:
Niklas Haas 2024-04-16 19:11:42 +02:00 committed by Anton Khirnov
parent efa9d3deca
commit 7351e067bc
3 changed files with 364 additions and 14 deletions

View File

@ -100,11 +100,14 @@ enum HEVCSliceType {
enum {
// 7.4.3.1: vps_max_layers_minus1 is in [0, 62].
HEVC_MAX_LAYERS = 63,
HEVC_MAX_LAYERS = 63,
// 7.4.3.1: vps_max_sub_layers_minus1 is in [0, 6].
HEVC_MAX_SUB_LAYERS = 7,
HEVC_MAX_SUB_LAYERS = 7,
// 7.4.3.1: vps_num_layer_sets_minus1 is in [0, 1023].
HEVC_MAX_LAYER_SETS = 1024,
HEVC_MAX_LAYER_SETS = 1024,
// 7.4.3.1: vps_max_layer_id is in [0, 63].
HEVC_MAX_LAYER_ID = 63,
HEVC_MAX_NUH_LAYER_ID = 62,
// 7.4.2.1: vps_video_parameter_set_id is u(4).
HEVC_MAX_VPS_COUNT = 16,

View File

@ -311,11 +311,17 @@ static int decode_profile_tier_level(GetBitContext *gb, AVCodecContext *avctx,
}
static int parse_ptl(GetBitContext *gb, AVCodecContext *avctx,
PTL *ptl, int max_num_sub_layers)
int profile_present, PTL *ptl, int max_num_sub_layers)
{
int i;
if (decode_profile_tier_level(gb, avctx, &ptl->general_ptl) < 0 ||
get_bits_left(gb) < 8 + (8*2 * (max_num_sub_layers - 1 > 0))) {
int i, status = 0;
if (profile_present) {
status = decode_profile_tier_level(gb, avctx, &ptl->general_ptl);
} else {
memset(&ptl->general_ptl, 0, sizeof(ptl->general_ptl));
}
if (status < 0 || get_bits_left(gb) < 8 + (8*2 * (max_num_sub_layers - 1 > 0))) {
av_log(avctx, AV_LOG_ERROR, "PTL information too short\n");
return -1;
}
@ -444,13 +450,273 @@ static void hevc_vps_free(FFRefStructOpaque opaque, void *obj)
av_freep(&vps->data);
}
enum ScalabilityMask {
HEVC_SCALABILITY_DEPTH = 0,
HEVC_SCALABILITY_MULTIVIEW = 1,
HEVC_SCALABILITY_SPATIAL = 2,
HEVC_SCALABILITY_AUXILIARY = 3,
HEVC_SCALABILITY_MASK_MAX = 15,
};
enum DependencyType {
HEVC_DEP_TYPE_SAMPLE = 0,
HEVC_DEP_TYPE_MV = 1,
HEVC_DEP_TYPE_BOTH = 2,
};
static int decode_vps_ext(GetBitContext *gb, AVCodecContext *avctx, HEVCVPS *vps,
uint64_t layer1_id_included)
{
PTL ptl_dummy;
uint8_t max_sub_layers[HEVC_MAX_LAYERS];
int splitting_flag, dimension_id_len, view_id_len, num_add_olss,
default_output_layer_idc, direct_dep_type_len, direct_dep_type,
sub_layers_max_present, sub_layer_flag_info_present_flag, nb_ptl;
unsigned non_vui_extension_length;
if (vps->vps_max_layers == 1 || vps->vps_num_layer_sets == 1) {
av_log(avctx, AV_LOG_VERBOSE, "Ignoring VPS extensions with a single layer\n");
return 0;
}
if (vps->vps_max_layers > 2) {
av_log(avctx, AV_LOG_ERROR,
"VPS has %d layers, only 2 layers are supported\n",
vps->vps_max_layers);
return AVERROR_PATCHWELCOME;
}
if (vps->vps_num_layer_sets > 2) {
av_log(avctx, AV_LOG_ERROR,
"VPS has %d layer sets, only 2 layer sets are supported\n",
vps->vps_num_layer_sets);
return AVERROR_PATCHWELCOME;
}
align_get_bits(gb);
/**
* For stereoscopic MV-HEVC, the following simplifying assumptions are made:
*
* - vps_max_layers = 2 (one base layer, one multiview layer)
* - vps_num_layer_sets = 2 (one output layer set for each view)
* - NumScalabilityTypes = 1 (only HEVC_SCALABILITY_MULTIVIEW)
* - direct_dependency_flag[1][0] = 1 (second layer depends on first)
* - num_add_olss = 0 (no extra output layer sets)
* - default_output_layer_idc = 0 (1:1 mapping between OLSs and layers)
* - layer_id_included_flag[1] = {1, 1} (consequence of layer dependencies)
* - vps_num_rep_formats_minus1 = 0 (all layers have the same size)
*
* Which results in the following derived variables:
* - ViewOrderIdx = {0, 1}
* - NumViews = 2
* - DependencyFlag[1][0] = 1
* - NumDirectRefLayers = {0, 1}
* - NumRefLayers = {0, 1}
* - NumPredictedLayers = {1, 0}
* - NumIndependentLayers = 1
* - NumLayersInTreePartition = {2}
* - NumLayerSets = 2
* - NumOutputLayerSets = 2
* - OlsIdxToLsIdx = {0, 1}
* - LayerIdxInVps = {0, 1}
* - NumLayersInIdList = {1, 2}
* - NumNecessaryLayers = {1, 2}
* - NecessaryLayerFlag = {{1, 0}, {1, 1}}
* - NumOutputLayersInOutputLayerSet = {1, 2}
* - OutputLayerFlag = {{1, 0}, {1, 1}}
*/
vps->nb_layers = 2;
if (parse_ptl(gb, avctx, 0, &ptl_dummy, vps->vps_max_sub_layers) < 0)
return AVERROR_INVALIDDATA;
splitting_flag = get_bits1(gb);
for (int i = 0; i <= HEVC_SCALABILITY_MASK_MAX; i++) {
int scalability_mask_flag = get_bits1(gb);
if (scalability_mask_flag != (i == HEVC_SCALABILITY_MULTIVIEW)) {
av_log(avctx, AV_LOG_ERROR, "Scalability type %d not supported\n", i);
return AVERROR_PATCHWELCOME;
}
}
if (!splitting_flag)
dimension_id_len = get_bits(gb, 3) + 1;
if (get_bits1(gb)) { /* vps_nuh_layer_id_present_flag */
int layer_id_in_nuh = get_bits(gb, 6);
if (layer_id_in_nuh >= FF_ARRAY_ELEMS(vps->layer_idx)) {
av_log(avctx, AV_LOG_ERROR, "Invalid layer_id_in_nuh[1]: %d\n",
layer_id_in_nuh);
return AVERROR_INVALIDDATA;
}
vps->layer_idx[layer_id_in_nuh] = 1;
vps->layer_id_in_nuh[1] = layer_id_in_nuh;
} else {
vps->layer_idx[1] = 1;
vps->layer_id_in_nuh[1] = 1;
}
if (!splitting_flag) {
int view_idx = get_bits(gb, dimension_id_len);
if (view_idx != 1) {
av_log(avctx, AV_LOG_ERROR, "Unexpected ViewOrderIdx: %d\n", view_idx);
return AVERROR_PATCHWELCOME;
}
}
view_id_len = get_bits(gb, 4);
if (view_id_len)
for (int i = 0; i < 2 /* NumViews */; i++)
vps->view_id[i] = get_bits(gb, view_id_len);
if (!get_bits1(gb) /* direct_dependency_flag */) {
av_log(avctx, AV_LOG_WARNING, "Independent output layers not supported\n");
return AVERROR_PATCHWELCOME;
}
vps->num_direct_ref_layers[1] = 1;
sub_layers_max_present = get_bits1(gb); // vps_sub_layers_max_minus1_present_flag
for (int i = 0; i < vps->vps_max_layers; i++)
max_sub_layers[i] = sub_layers_max_present ? get_bits(gb, 3) + 1 :
vps->vps_max_sub_layers;
if (get_bits1(gb) /* max_tid_ref_present_flag */)
skip_bits(gb, 3); // max_tid_il_ref_pics_plus1
vps->default_ref_layers_active = get_bits1(gb);
nb_ptl = get_ue_golomb(gb) + 1;
/* idx [0] is signalled in base VPS, idx [1] is signalled at the
* start of VPS extension, indices 2+ are signalled here;
* we ignore all but the first one anyway */
for (int i = 2; i < nb_ptl; i++) {
int profile_present = get_bits1(gb);
if (parse_ptl(gb, avctx, profile_present, &ptl_dummy, vps->vps_max_sub_layers) < 0)
return AVERROR_INVALIDDATA;
}
num_add_olss = get_ue_golomb(gb);
if (num_add_olss != 0) {
/* Since we don't implement support for independent output layer sets
* and auxiliary layers, this should never nonzero */
av_log(avctx, AV_LOG_ERROR, "Unexpected num_add_olss: %d\n", num_add_olss);
return AVERROR_PATCHWELCOME;
}
default_output_layer_idc = get_bits(gb, 2);
if (default_output_layer_idc != 0) {
av_log(avctx, AV_LOG_WARNING, "Unsupported default_output_layer_idc: %d\n",
default_output_layer_idc);
return AVERROR_PATCHWELCOME;
}
/* Consequence of established layer dependencies */
if (layer1_id_included != ((1 << vps->layer_id_in_nuh[0]) |
(1 << vps->layer_id_in_nuh[1]))) {
av_log(avctx, AV_LOG_ERROR, "Dependent layer not included in layer ID?\n");
return AVERROR_PATCHWELCOME;
}
vps->num_output_layer_sets = 2;
vps->ols[1] = 3;
for (int j = 0; j < av_popcount64(vps->ols[1]); j++) {
int ptl_idx = get_bits(gb, av_ceil_log2(nb_ptl));
if (ptl_idx < 1 || ptl_idx >= nb_ptl) {
av_log(avctx, AV_LOG_ERROR, "Invalid PTL index: %d\n", ptl_idx);
return AVERROR_INVALIDDATA;
}
}
if (get_ue_golomb_31(gb) != 0 /* vps_num_rep_formats_minus1 */) {
av_log(avctx, AV_LOG_ERROR, "Unexpected extra rep formats\n");
return AVERROR_INVALIDDATA;
}
vps->rep_format.pic_width_in_luma_samples = get_bits(gb, 16);
vps->rep_format.pic_height_in_luma_samples = get_bits(gb, 16);
if (!get_bits1(gb) /* chroma_and_bit_depth_vps_present_flag */) {
av_log(avctx, AV_LOG_ERROR,
"chroma_and_bit_depth_vps_present_flag=0 in first rep_format\n");
return AVERROR_INVALIDDATA;
}
vps->rep_format.chroma_format_idc = get_bits(gb, 2);
if (vps->rep_format.chroma_format_idc == 3)
vps->rep_format.separate_colour_plane_flag = get_bits1(gb);
vps->rep_format.bit_depth_luma = get_bits(gb, 4) + 8;
vps->rep_format.bit_depth_chroma = get_bits(gb, 4) + 8;
if (vps->rep_format.bit_depth_luma > 16 ||
vps->rep_format.bit_depth_chroma > 16 ||
vps->rep_format.bit_depth_luma != vps->rep_format.bit_depth_chroma) {
av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth: %"PRIu8" %"PRIu8"\n",
vps->rep_format.bit_depth_luma, vps->rep_format.bit_depth_chroma);
return AVERROR_PATCHWELCOME;
}
if (get_bits1(gb) /* conformance_window_vps_flag */) {
int vert_mult = hevc_sub_height_c[vps->rep_format.chroma_format_idc];
int horiz_mult = hevc_sub_width_c[vps->rep_format.chroma_format_idc];
vps->rep_format.conf_win_left_offset = get_ue_golomb(gb) * horiz_mult;
vps->rep_format.conf_win_right_offset = get_ue_golomb(gb) * horiz_mult;
vps->rep_format.conf_win_top_offset = get_ue_golomb(gb) * vert_mult;
vps->rep_format.conf_win_bottom_offset = get_ue_golomb(gb) * vert_mult;
}
vps->max_one_active_ref_layer = get_bits1(gb);
vps->poc_lsb_aligned = get_bits1(gb);
sub_layer_flag_info_present_flag = get_bits1(gb);
for (int j = 0; j < FFMAX(max_sub_layers[0], max_sub_layers[1]); j++) {
int sub_layer_dpb_info_present_flag = 1;
if (j > 0 && sub_layer_flag_info_present_flag)
sub_layer_dpb_info_present_flag = get_bits1(gb);
if (sub_layer_dpb_info_present_flag) {
for (int k = 0; k < av_popcount64(vps->ols[1]); k++)
vps->dpb_size.max_dec_pic_buffering = get_ue_golomb_long(gb) + 1;
vps->dpb_size.max_num_reorder_pics = get_ue_golomb_long(gb);
vps->dpb_size.max_latency_increase = get_ue_golomb_long(gb) - 1;
}
}
direct_dep_type_len = get_ue_golomb_31(gb) + 2;
if (direct_dep_type_len > 32) {
av_log(avctx, AV_LOG_ERROR, "Invalid direct_dep_type_len: %d\n",
direct_dep_type_len);
return AVERROR_INVALIDDATA;
}
skip_bits1(gb); /* direct_depenency_all_layers_flag */
direct_dep_type = get_bits_long(gb, direct_dep_type_len);
if (direct_dep_type > HEVC_DEP_TYPE_BOTH) {
av_log(avctx, AV_LOG_WARNING, "Unsupported direct_dep_type: %d\n",
direct_dep_type);
return AVERROR_PATCHWELCOME;
}
non_vui_extension_length = get_ue_golomb(gb);
if (non_vui_extension_length > 4096) {
av_log(avctx, AV_LOG_ERROR, "vps_non_vui_extension_length too large: %u\n",
non_vui_extension_length);
return AVERROR_INVALIDDATA;
}
skip_bits_long(gb, non_vui_extension_length * 8);
if (get_bits1(gb)) // vps_vui_present_flag
av_log(avctx, AV_LOG_WARNING, "VPS VUI not supported\n");
return 0;
}
int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
HEVCParamSets *ps)
{
int i,j;
int i;
int vps_id = get_bits(gb, 4);
ptrdiff_t nal_size = gb->buffer_end - gb->buffer;
int ret = AVERROR_INVALIDDATA;
uint64_t layer1_id_included = 0;
HEVCVPS *vps;
if (ps->vps_list[vps_id]) {
@ -494,7 +760,7 @@ int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
goto err;
}
if (parse_ptl(gb, avctx, &vps->ptl, vps->vps_max_sub_layers) < 0)
if (parse_ptl(gb, avctx, 1, &vps->ptl, vps->vps_max_sub_layers) < 0)
goto err;
vps->vps_sub_layer_ordering_info_present_flag = get_bits1(gb);
@ -526,9 +792,14 @@ int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
goto err;
}
for (i = 1; i < vps->vps_num_layer_sets; i++)
for (j = 0; j <= vps->vps_max_layer_id; j++)
skip_bits(gb, 1); // layer_id_included_flag[i][j]
vps->num_output_layer_sets = 1;
vps->ols[0] = 1;
// we support at most 2 layers, so ignore the others
if (vps->vps_num_layer_sets > 1)
layer1_id_included = get_bits64(gb, vps->vps_max_layer_id + 1); // layer_id_included_flag
if (vps->vps_num_layer_sets > 2)
skip_bits_long(gb, (vps->vps_num_layer_sets - 2) * (vps->vps_max_layer_id + 1));
vps->vps_timing_info_present_flag = get_bits1(gb);
if (vps->vps_timing_info_present_flag) {
@ -560,7 +831,21 @@ int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
vps->vps_max_sub_layers);
}
}
get_bits1(gb); /* vps_extension_flag */
vps->nb_layers = 1;
vps->layer_idx[0] = 0;
for (int i = 1; i < FF_ARRAY_ELEMS(vps->layer_idx); i++)
vps->layer_idx[i] = -1;
if (vps->vps_max_layers > 1 && get_bits1(gb)) { /* vps_extension_flag */
int ret = decode_vps_ext(gb, avctx, vps, layer1_id_included);
if (ret == AVERROR_PATCHWELCOME) {
vps->nb_layers = 1;
av_log(avctx, AV_LOG_WARNING, "Ignoring unsupported VPS extension\n");
ret = 0;
} else if (ret < 0)
goto err;
}
if (get_bits_left(gb) < 0) {
av_log(avctx, AV_LOG_ERROR,
@ -893,7 +1178,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
sps->temporal_id_nesting = get_bits(gb, 1);
if ((ret = parse_ptl(gb, avctx, &sps->ptl, sps->max_sub_layers)) < 0)
if ((ret = parse_ptl(gb, avctx, 1, &sps->ptl, sps->max_sub_layers)) < 0)
return ret;
*sps_id = get_ue_golomb_long(gb);

View File

@ -32,6 +32,8 @@
#include "hevc.h"
#define HEVC_VPS_MAX_LAYERS 2
typedef struct HEVCSublayerHdrParams {
uint32_t bit_rate_value_minus1[HEVC_MAX_CPB_CNT];
uint32_t cpb_size_value_minus1[HEVC_MAX_CPB_CNT];
@ -153,6 +155,19 @@ typedef struct PTL {
uint8_t sub_layer_level_present_flag[HEVC_MAX_SUB_LAYERS];
} PTL;
typedef struct RepFormat {
uint16_t pic_width_in_luma_samples;
uint16_t pic_height_in_luma_samples;
uint8_t chroma_format_idc;
uint8_t separate_colour_plane_flag;
uint8_t bit_depth_luma; ///< bit_depth_vps_luma_minus8 + 8
uint8_t bit_depth_chroma; ///< bit_depth_vps_chroma_minus8 + 8
uint16_t conf_win_left_offset;
uint16_t conf_win_right_offset;
uint16_t conf_win_top_offset;
uint16_t conf_win_bottom_offset;
} RepFormat;
typedef struct HEVCVPS {
unsigned int vps_id;
@ -176,6 +191,53 @@ typedef struct HEVCVPS {
HEVCHdrParams *hdr;
/* VPS extension */
/* Number of layers this VPS was parsed for, between 1 and
* min(HEVC_VPS_MAX_LAYERS, vps_max_layers).
*
* Note that vps_max_layers contains the layer count declared in the
* bitstream, while nb_layers contains the number of layers exported to
* users of this API (which may be smaller as we only support a subset of
* multilayer extensions).
*
* Arrays below documented as [layer_idx] have nb_layers valid entries.
*/
int nb_layers;
// LayerIdxInVps[nuh_layer_id], i.e. a mapping of nuh_layer_id to VPS layer
// indices. Valid values are between 0 and HEVC_VPS_MAX_LAYERS. Entries for
// unmapped values of nuh_layer_id are set to -1.
int8_t layer_idx[HEVC_MAX_NUH_LAYER_ID + 1];
uint8_t layer_id_in_nuh[HEVC_VPS_MAX_LAYERS];
uint8_t default_ref_layers_active;
uint8_t max_one_active_ref_layer;
uint8_t poc_lsb_aligned;
// bitmask of poc_lsb_not_present[layer_idx]
uint8_t poc_lsb_not_present;
struct {
unsigned max_dec_pic_buffering; // max_vps_dec_pic_buffering_minus1 + 1
unsigned max_num_reorder_pics; // max_vps_num_reorder_pics
unsigned max_latency_increase; // max_vps_latency_increase_plus1 - 1
} dpb_size;
// ViewId[layer_idx]
uint16_t view_id[HEVC_VPS_MAX_LAYERS];
// NumOutputLayerSets
uint8_t num_output_layer_sets;
// Bitmasks specifying output layer sets. i-th bit set means layer with VPS
// index i is present in the layer set.
uint64_t ols[HEVC_VPS_MAX_LAYERS];
// NumDirectRefLayers[layer_idx]
uint8_t num_direct_ref_layers[HEVC_VPS_MAX_LAYERS];
RepFormat rep_format;
uint8_t *data;
int data_size;
} HEVCVPS;