FFmpeg/libavcodec/audiodsp.c
Rémi Denis-Courmont 04d092e7d5 lavc/audiodsp: RISC-V F vector_clipf
RV64G supports MIN & MAX instructions natively only on floating point
registers, not general purpose ones. The later would require the Zbb
extension. Due to that, it is actually faster to perform the clipping
"properly" in FPU.

Benchmarks on SiFive U74-MC (courtesy of Shanghai StarFive Tech):
audiodsp.vector_clipf_c: 29551.5
audiodsp.vector_clipf_rvf: 17871.0

Also tried unrolling with 2 or 8 elements but it gets worse either way.
2022-09-27 13:19:52 +02:00

122 lines
3.9 KiB
C

/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavutil/common.h"
#include "audiodsp.h"
static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
uint32_t maxi, uint32_t maxisign)
{
if (a > mini)
return mini;
else if ((a ^ (1U << 31)) > maxisign)
return maxi;
else
return a;
}
static void vector_clipf_c_opposite_sign(float *dst, const float *src,
float *min, float *max, int len)
{
int i;
uint32_t mini = *(uint32_t *) min;
uint32_t maxi = *(uint32_t *) max;
uint32_t maxisign = maxi ^ (1U << 31);
uint32_t *dsti = (uint32_t *) dst;
const uint32_t *srci = (const uint32_t *) src;
for (i = 0; i < len; i += 8) {
dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
}
}
static void vector_clipf_c(float *dst, const float *src, int len,
float min, float max)
{
int i;
if (min < 0 && max > 0) {
vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
} else {
for (i = 0; i < len; i += 8) {
dst[i] = av_clipf(src[i], min, max);
dst[i + 1] = av_clipf(src[i + 1], min, max);
dst[i + 2] = av_clipf(src[i + 2], min, max);
dst[i + 3] = av_clipf(src[i + 3], min, max);
dst[i + 4] = av_clipf(src[i + 4], min, max);
dst[i + 5] = av_clipf(src[i + 5], min, max);
dst[i + 6] = av_clipf(src[i + 6], min, max);
dst[i + 7] = av_clipf(src[i + 7], min, max);
}
}
}
static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2,
int order)
{
unsigned res = 0;
while (order--)
res += *v1++ **v2++;
return res;
}
static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len)
{
do {
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
*dst++ = av_clip(*src++, min, max);
len -= 8;
} while (len > 0);
}
av_cold void ff_audiodsp_init(AudioDSPContext *c)
{
c->scalarproduct_int16 = scalarproduct_int16_c;
c->vector_clip_int32 = vector_clip_int32_c;
c->vector_clipf = vector_clipf_c;
#if ARCH_ARM
ff_audiodsp_init_arm(c);
#elif ARCH_PPC
ff_audiodsp_init_ppc(c);
#elif ARCH_RISCV
ff_audiodsp_init_riscv(c);
#elif ARCH_X86
ff_audiodsp_init_x86(c);
#endif
}