avcodec/dcahuff: Combine tables, use ff_init_vlc_from_lengths()

Up until now, initializing the dca VLC tables uses ff_init_vlc_sparse()
with length tables of type uint8_t and code tables of type uint16_t
(except for the LBR tables, which uses length and symbols of type
uint8_t; these tables are interleaved). In case of the quant index
codebooks these arrays were accessed via tables of pointers to the
individual tables.

This commit changes this: First, we switch to ff_init_vlc_from_lengths()
to replace the uint16_t code tables by uint8_t symbol tables
(this necessitates ordering the tables from left-to-right in the tree
first). These symbol tables are interleaved with the length tables.

Furthermore, these tables are combined in order to remove the table of
pointers to individual tables, thereby avoiding relocations (for x64
elf systems this amounts to 96*24B = 2304B saved in .rela.dyn) and
saving 1280B from .data.rel.ro (for 64bit systems). Meanwhile the
savings in .rodata amount to 2709 + 2 * 334 = 3377B. Due to padding
the actual savings are higher: The ELF x64 ABI requires objects >= 16B
to be padded to 16B and lots of the tables have 2^n + 1 elements
of these were from replacing uint16_t codes with uint8_t symbols;
the rest was due to the fact that combining the tables eliminated
padding (the ELF x64 ABI requires objects >= 16B to be padded to 16B
and lots of the tables have 2^n + 1 elements)). Taking this into
account gives savings of 4548B. (GCC by default uses an even higher
alignment (controlled by -malign-data); for it the savings are 5748B.)

These changes also necessitated to modify the init code for
the encoder tables.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt 2022-09-06 02:12:14 +02:00
parent 2339f63eac
commit 97610e856a
3 changed files with 766 additions and 1225 deletions

View File

@ -166,30 +166,38 @@ static uint16_t bitalloc_table[DCA_NUM_BITALLOC_CODES][2];
static const uint16_t (*bitalloc_tables[DCA_CODE_BOOKS][8])[2];
static av_cold void create_enc_table(uint16_t dst[][2], unsigned count,
const uint8_t len[], const uint16_t codes[])
const uint8_t (**src_tablep)[2])
{
const uint8_t (*src_table)[2] = *src_tablep;
uint16_t code = 0;
for (unsigned i = 0; i < count; i++) {
dst[i][0] = codes[i];
dst[i][1] = len[i];
unsigned dst_idx = src_table[i][0];
dst[dst_idx][0] = code >> (16 - src_table[i][1]);
dst[dst_idx][1] = src_table[i][1];
code += 1 << (16 - src_table[i][1]);
}
*src_tablep += count;
}
static av_cold void dcaenc_init_static_tables(void)
{
uint16_t (*bitalloc_dst)[2] = bitalloc_table;
const uint8_t (*src_table)[2] = ff_dca_vlc_src_tables;
for (unsigned i = 0; i < DCA_CODE_BOOKS; i++) {
for (unsigned j = 0; ff_dca_bitalloc_codes[i][j]; j++) {
for (unsigned j = 0; j < ff_dca_quant_index_group_size[i]; j++) {
create_enc_table(bitalloc_dst, ff_dca_bitalloc_sizes[i],
ff_dca_bitalloc_bits[i][j], ff_dca_bitalloc_codes[i][j]);
&src_table);
bitalloc_tables[i][j] = bitalloc_dst - ff_dca_bitalloc_offsets[i];
bitalloc_dst += ff_dca_bitalloc_sizes[i];
}
}
for (unsigned i = 0; i < DCA_BITALLOC_12_COUNT; i++)
create_enc_table(&bitalloc_12_table[i][1], 12,
ff_dca_bitalloc_12_bits[i], ff_dca_bitalloc_12_codes[i]);
create_enc_table(&bitalloc_12_table[i][1], 12, &src_table);
}
static int encode_init(AVCodecContext *avctx)

File diff suppressed because it is too large Load Diff

View File

@ -62,11 +62,7 @@ extern VLC ff_dca_vlc_rsd;
extern const int8_t ff_dca_bitalloc_offsets[DCA_CODE_BOOKS];
extern const uint8_t ff_dca_bitalloc_sizes[DCA_CODE_BOOKS];
extern const uint16_t *const ff_dca_bitalloc_codes[DCA_CODE_BOOKS][8];
extern const uint8_t *const ff_dca_bitalloc_bits[DCA_CODE_BOOKS][8];
extern const uint8_t ff_dca_bitalloc_12_bits[DCA_BITALLOC_12_COUNT][12];
extern const uint16_t ff_dca_bitalloc_12_codes[DCA_BITALLOC_12_COUNT][12];
extern const uint8_t ff_dca_vlc_src_tables[][2];
av_cold void ff_dca_init_vlcs(void);