FFmpeg/libavutil/mem.c
Andreas Rheinhardt aca09ed7d4 avutil/mem: Handle fast allocations near UINT_MAX properly
av_fast_realloc and av_fast_mallocz? store the size of
the objects they allocate in an unsigned. Yet they overallocate
and currently they can allocate more than UINT_MAX bytes
in case a user has requested a size of about UINT_MAX * 16 / 17
or more if SIZE_MAX > UINT_MAX (and if the user increased
max_alloc_size via av_max_alloc). In this case it is impossible
to store the true size of the buffer via the unsigned*;
future requests are likely to use the (re)allocation codepath
even if the buffer is actually large enough because of
the incorrect size.

Fix this by ensuring that the actually allocated size
always fits into an unsigned. (This entails erroring out
in case the user requested more than UINT_MAX.)

Reviewed-by: Tomas Härdin <tjoppen@acc.umu.se>
Reviewed-by: Anton Khirnov <anton@khirnov.net>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2022-07-06 22:53:15 +02:00

579 lines
13 KiB
C

/*
* default memory allocator for libavutil
* Copyright (c) 2002 Fabrice Bellard
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* default memory allocator for libavutil
*/
#define _XOPEN_SOURCE 600
#include "config.h"
#include <limits.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdatomic.h>
#include <string.h>
#if HAVE_MALLOC_H
#include <malloc.h>
#endif
#include "attributes.h"
#include "avassert.h"
#include "dynarray.h"
#include "error.h"
#include "internal.h"
#include "intreadwrite.h"
#include "macros.h"
#include "mem.h"
#ifdef MALLOC_PREFIX
#define malloc AV_JOIN(MALLOC_PREFIX, malloc)
#define memalign AV_JOIN(MALLOC_PREFIX, memalign)
#define posix_memalign AV_JOIN(MALLOC_PREFIX, posix_memalign)
#define realloc AV_JOIN(MALLOC_PREFIX, realloc)
#define free AV_JOIN(MALLOC_PREFIX, free)
void *malloc(size_t size);
void *memalign(size_t align, size_t size);
int posix_memalign(void **ptr, size_t align, size_t size);
void *realloc(void *ptr, size_t size);
void free(void *ptr);
#endif /* MALLOC_PREFIX */
#define ALIGN (HAVE_AVX512 ? 64 : (HAVE_AVX ? 32 : 16))
/* NOTE: if you want to override these functions with your own
* implementations (not recommended) you have to link libav* as
* dynamic libraries and remove -Wl,-Bsymbolic from the linker flags.
* Note that this will cost performance. */
static atomic_size_t max_alloc_size = ATOMIC_VAR_INIT(INT_MAX);
void av_max_alloc(size_t max){
atomic_store_explicit(&max_alloc_size, max, memory_order_relaxed);
}
static int size_mult(size_t a, size_t b, size_t *r)
{
size_t t;
#if (!defined(__INTEL_COMPILER) && AV_GCC_VERSION_AT_LEAST(5,1)) || AV_HAS_BUILTIN(__builtin_mul_overflow)
if (__builtin_mul_overflow(a, b, &t))
return AVERROR(EINVAL);
#else
t = a * b;
/* Hack inspired from glibc: don't try the division if nelem and elsize
* are both less than sqrt(SIZE_MAX). */
if ((a | b) >= ((size_t)1 << (sizeof(size_t) * 4)) && a && t / a != b)
return AVERROR(EINVAL);
#endif
*r = t;
return 0;
}
void *av_malloc(size_t size)
{
void *ptr = NULL;
if (size > atomic_load_explicit(&max_alloc_size, memory_order_relaxed))
return NULL;
#if HAVE_POSIX_MEMALIGN
if (size) //OS X on SDK 10.6 has a broken posix_memalign implementation
if (posix_memalign(&ptr, ALIGN, size))
ptr = NULL;
#elif HAVE_ALIGNED_MALLOC
ptr = _aligned_malloc(size, ALIGN);
#elif HAVE_MEMALIGN
#ifndef __DJGPP__
ptr = memalign(ALIGN, size);
#else
ptr = memalign(size, ALIGN);
#endif
/* Why 64?
* Indeed, we should align it:
* on 4 for 386
* on 16 for 486
* on 32 for 586, PPro - K6-III
* on 64 for K7 (maybe for P3 too).
* Because L1 and L2 caches are aligned on those values.
* But I don't want to code such logic here!
*/
/* Why 32?
* For AVX ASM. SSE / NEON needs only 16.
* Why not larger? Because I did not see a difference in benchmarks ...
*/
/* benchmarks with P3
* memalign(64) + 1 3071, 3051, 3032
* memalign(64) + 2 3051, 3032, 3041
* memalign(64) + 4 2911, 2896, 2915
* memalign(64) + 8 2545, 2554, 2550
* memalign(64) + 16 2543, 2572, 2563
* memalign(64) + 32 2546, 2545, 2571
* memalign(64) + 64 2570, 2533, 2558
*
* BTW, malloc seems to do 8-byte alignment by default here.
*/
#else
ptr = malloc(size);
#endif
if(!ptr && !size) {
size = 1;
ptr= av_malloc(1);
}
#if CONFIG_MEMORY_POISONING
if (ptr)
memset(ptr, FF_MEMORY_POISON, size);
#endif
return ptr;
}
void *av_realloc(void *ptr, size_t size)
{
void *ret;
if (size > atomic_load_explicit(&max_alloc_size, memory_order_relaxed))
return NULL;
#if HAVE_ALIGNED_MALLOC
ret = _aligned_realloc(ptr, size + !size, ALIGN);
#else
ret = realloc(ptr, size + !size);
#endif
#if CONFIG_MEMORY_POISONING
if (ret && !ptr)
memset(ret, FF_MEMORY_POISON, size);
#endif
return ret;
}
void *av_realloc_f(void *ptr, size_t nelem, size_t elsize)
{
size_t size;
void *r;
if (size_mult(elsize, nelem, &size)) {
av_free(ptr);
return NULL;
}
r = av_realloc(ptr, size);
if (!r)
av_free(ptr);
return r;
}
int av_reallocp(void *ptr, size_t size)
{
void *val;
if (!size) {
av_freep(ptr);
return 0;
}
memcpy(&val, ptr, sizeof(val));
val = av_realloc(val, size);
if (!val) {
av_freep(ptr);
return AVERROR(ENOMEM);
}
memcpy(ptr, &val, sizeof(val));
return 0;
}
void *av_malloc_array(size_t nmemb, size_t size)
{
size_t result;
if (size_mult(nmemb, size, &result) < 0)
return NULL;
return av_malloc(result);
}
#if FF_API_AV_MALLOCZ_ARRAY
void *av_mallocz_array(size_t nmemb, size_t size)
{
size_t result;
if (size_mult(nmemb, size, &result) < 0)
return NULL;
return av_mallocz(result);
}
#endif
void *av_realloc_array(void *ptr, size_t nmemb, size_t size)
{
size_t result;
if (size_mult(nmemb, size, &result) < 0)
return NULL;
return av_realloc(ptr, result);
}
int av_reallocp_array(void *ptr, size_t nmemb, size_t size)
{
void *val;
memcpy(&val, ptr, sizeof(val));
val = av_realloc_f(val, nmemb, size);
memcpy(ptr, &val, sizeof(val));
if (!val && nmemb && size)
return AVERROR(ENOMEM);
return 0;
}
void av_free(void *ptr)
{
#if HAVE_ALIGNED_MALLOC
_aligned_free(ptr);
#else
free(ptr);
#endif
}
void av_freep(void *arg)
{
void *val;
memcpy(&val, arg, sizeof(val));
memcpy(arg, &(void *){ NULL }, sizeof(val));
av_free(val);
}
void *av_mallocz(size_t size)
{
void *ptr = av_malloc(size);
if (ptr)
memset(ptr, 0, size);
return ptr;
}
void *av_calloc(size_t nmemb, size_t size)
{
size_t result;
if (size_mult(nmemb, size, &result) < 0)
return NULL;
return av_mallocz(result);
}
char *av_strdup(const char *s)
{
char *ptr = NULL;
if (s) {
size_t len = strlen(s) + 1;
ptr = av_realloc(NULL, len);
if (ptr)
memcpy(ptr, s, len);
}
return ptr;
}
char *av_strndup(const char *s, size_t len)
{
char *ret = NULL, *end;
if (!s)
return NULL;
end = memchr(s, 0, len);
if (end)
len = end - s;
ret = av_realloc(NULL, len + 1);
if (!ret)
return NULL;
memcpy(ret, s, len);
ret[len] = 0;
return ret;
}
void *av_memdup(const void *p, size_t size)
{
void *ptr = NULL;
if (p) {
ptr = av_malloc(size);
if (ptr)
memcpy(ptr, p, size);
}
return ptr;
}
int av_dynarray_add_nofree(void *tab_ptr, int *nb_ptr, void *elem)
{
void **tab;
memcpy(&tab, tab_ptr, sizeof(tab));
FF_DYNARRAY_ADD(INT_MAX, sizeof(*tab), tab, *nb_ptr, {
tab[*nb_ptr] = elem;
memcpy(tab_ptr, &tab, sizeof(tab));
}, {
return AVERROR(ENOMEM);
});
return 0;
}
void av_dynarray_add(void *tab_ptr, int *nb_ptr, void *elem)
{
void **tab;
memcpy(&tab, tab_ptr, sizeof(tab));
FF_DYNARRAY_ADD(INT_MAX, sizeof(*tab), tab, *nb_ptr, {
tab[*nb_ptr] = elem;
memcpy(tab_ptr, &tab, sizeof(tab));
}, {
*nb_ptr = 0;
av_freep(tab_ptr);
});
}
void *av_dynarray2_add(void **tab_ptr, int *nb_ptr, size_t elem_size,
const uint8_t *elem_data)
{
uint8_t *tab_elem_data = NULL;
FF_DYNARRAY_ADD(INT_MAX, elem_size, *tab_ptr, *nb_ptr, {
tab_elem_data = (uint8_t *)*tab_ptr + (*nb_ptr) * elem_size;
if (elem_data)
memcpy(tab_elem_data, elem_data, elem_size);
else if (CONFIG_MEMORY_POISONING)
memset(tab_elem_data, FF_MEMORY_POISON, elem_size);
}, {
av_freep(tab_ptr);
*nb_ptr = 0;
});
return tab_elem_data;
}
static void fill16(uint8_t *dst, int len)
{
uint32_t v = AV_RN16(dst - 2);
v |= v << 16;
while (len >= 4) {
AV_WN32(dst, v);
dst += 4;
len -= 4;
}
while (len--) {
*dst = dst[-2];
dst++;
}
}
static void fill24(uint8_t *dst, int len)
{
#if HAVE_BIGENDIAN
uint32_t v = AV_RB24(dst - 3);
uint32_t a = v << 8 | v >> 16;
uint32_t b = v << 16 | v >> 8;
uint32_t c = v << 24 | v;
#else
uint32_t v = AV_RL24(dst - 3);
uint32_t a = v | v << 24;
uint32_t b = v >> 8 | v << 16;
uint32_t c = v >> 16 | v << 8;
#endif
while (len >= 12) {
AV_WN32(dst, a);
AV_WN32(dst + 4, b);
AV_WN32(dst + 8, c);
dst += 12;
len -= 12;
}
if (len >= 4) {
AV_WN32(dst, a);
dst += 4;
len -= 4;
}
if (len >= 4) {
AV_WN32(dst, b);
dst += 4;
len -= 4;
}
while (len--) {
*dst = dst[-3];
dst++;
}
}
static void fill32(uint8_t *dst, int len)
{
uint32_t v = AV_RN32(dst - 4);
#if HAVE_FAST_64BIT
uint64_t v2= v + ((uint64_t)v<<32);
while (len >= 32) {
AV_WN64(dst , v2);
AV_WN64(dst+ 8, v2);
AV_WN64(dst+16, v2);
AV_WN64(dst+24, v2);
dst += 32;
len -= 32;
}
#endif
while (len >= 4) {
AV_WN32(dst, v);
dst += 4;
len -= 4;
}
while (len--) {
*dst = dst[-4];
dst++;
}
}
void av_memcpy_backptr(uint8_t *dst, int back, int cnt)
{
const uint8_t *src = &dst[-back];
if (!back)
return;
if (back == 1) {
memset(dst, *src, cnt);
} else if (back == 2) {
fill16(dst, cnt);
} else if (back == 3) {
fill24(dst, cnt);
} else if (back == 4) {
fill32(dst, cnt);
} else {
if (cnt >= 16) {
int blocklen = back;
while (cnt > blocklen) {
memcpy(dst, src, blocklen);
dst += blocklen;
cnt -= blocklen;
blocklen <<= 1;
}
memcpy(dst, src, cnt);
return;
}
if (cnt >= 8) {
AV_COPY32U(dst, src);
AV_COPY32U(dst + 4, src + 4);
src += 8;
dst += 8;
cnt -= 8;
}
if (cnt >= 4) {
AV_COPY32U(dst, src);
src += 4;
dst += 4;
cnt -= 4;
}
if (cnt >= 2) {
AV_COPY16U(dst, src);
src += 2;
dst += 2;
cnt -= 2;
}
if (cnt)
*dst = *src;
}
}
void *av_fast_realloc(void *ptr, unsigned int *size, size_t min_size)
{
size_t max_size;
if (min_size <= *size)
return ptr;
max_size = atomic_load_explicit(&max_alloc_size, memory_order_relaxed);
/* *size is an unsigned, so the real maximum is <= UINT_MAX. */
max_size = FFMIN(max_size, UINT_MAX);
if (min_size > max_size) {
*size = 0;
return NULL;
}
min_size = FFMIN(max_size, FFMAX(min_size + min_size / 16 + 32, min_size));
ptr = av_realloc(ptr, min_size);
/* we could set this to the unmodified min_size but this is safer
* if the user lost the ptr and uses NULL now
*/
if (!ptr)
min_size = 0;
*size = min_size;
return ptr;
}
static inline void fast_malloc(void *ptr, unsigned int *size, size_t min_size, int zero_realloc)
{
size_t max_size;
void *val;
memcpy(&val, ptr, sizeof(val));
if (min_size <= *size) {
av_assert0(val || !min_size);
return;
}
max_size = atomic_load_explicit(&max_alloc_size, memory_order_relaxed);
/* *size is an unsigned, so the real maximum is <= UINT_MAX. */
max_size = FFMIN(max_size, UINT_MAX);
if (min_size > max_size) {
av_freep(ptr);
*size = 0;
return;
}
min_size = FFMIN(max_size, FFMAX(min_size + min_size / 16 + 32, min_size));
av_freep(ptr);
val = zero_realloc ? av_mallocz(min_size) : av_malloc(min_size);
memcpy(ptr, &val, sizeof(val));
if (!val)
min_size = 0;
*size = min_size;
return;
}
void av_fast_malloc(void *ptr, unsigned int *size, size_t min_size)
{
fast_malloc(ptr, size, min_size, 0);
}
void av_fast_mallocz(void *ptr, unsigned int *size, size_t min_size)
{
fast_malloc(ptr, size, min_size, 1);
}
int av_size_mult(size_t a, size_t b, size_t *r)
{
return size_mult(a, b, r);
}