mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2024-09-19 21:06:42 +00:00
x86/dsputil: port ff_vector_clipf_sse to yasm
Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
fdcb2873e1
commit
1d36defe94
@ -625,3 +625,47 @@ INIT_MMX mmx
|
|||||||
PUT_SIGNED_PIXELS_CLAMPED 0
|
PUT_SIGNED_PIXELS_CLAMPED 0
|
||||||
INIT_XMM sse2
|
INIT_XMM sse2
|
||||||
PUT_SIGNED_PIXELS_CLAMPED 3
|
PUT_SIGNED_PIXELS_CLAMPED 3
|
||||||
|
|
||||||
|
;-----------------------------------------------------
|
||||||
|
;void ff_vector_clipf(float *dst, const float *src,
|
||||||
|
; float min, float max, int len)
|
||||||
|
;-----------------------------------------------------
|
||||||
|
INIT_XMM sse
|
||||||
|
%if ARCH_X86_32
|
||||||
|
cglobal vector_clipf, 5,5,6, dst, src, min, max, len
|
||||||
|
%else
|
||||||
|
cglobal vector_clipf, 3,3,6, dst, src, len
|
||||||
|
%endif
|
||||||
|
%if WIN64
|
||||||
|
SWAP 0, 2
|
||||||
|
SWAP 1, 3
|
||||||
|
%elif ARCH_X86_32
|
||||||
|
movss m0, minm
|
||||||
|
movss m1, maxm
|
||||||
|
%endif
|
||||||
|
SPLATD m0
|
||||||
|
SPLATD m1
|
||||||
|
shl lenq, 2
|
||||||
|
add srcq, lenq
|
||||||
|
add dstq, lenq
|
||||||
|
neg lenq
|
||||||
|
.loop:
|
||||||
|
mova m2, [srcq+lenq+mmsize*0]
|
||||||
|
mova m3, [srcq+lenq+mmsize*1]
|
||||||
|
mova m4, [srcq+lenq+mmsize*2]
|
||||||
|
mova m5, [srcq+lenq+mmsize*3]
|
||||||
|
maxps m2, m0
|
||||||
|
maxps m3, m0
|
||||||
|
maxps m4, m0
|
||||||
|
maxps m5, m0
|
||||||
|
minps m2, m1
|
||||||
|
minps m3, m1
|
||||||
|
minps m4, m1
|
||||||
|
minps m5, m1
|
||||||
|
mova [dstq+lenq+mmsize*0], m2
|
||||||
|
mova [dstq+lenq+mmsize*1], m3
|
||||||
|
mova [dstq+lenq+mmsize*2], m4
|
||||||
|
mova [dstq+lenq+mmsize*3], m5
|
||||||
|
add lenq, mmsize*4
|
||||||
|
jl .loop
|
||||||
|
REP_RET
|
||||||
|
@ -585,12 +585,10 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
|
|||||||
static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx,
|
static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx,
|
||||||
int cpu_flags, unsigned high_bit_depth)
|
int cpu_flags, unsigned high_bit_depth)
|
||||||
{
|
{
|
||||||
#if HAVE_SSE_INLINE
|
|
||||||
c->vector_clipf = ff_vector_clipf_sse;
|
|
||||||
#endif /* HAVE_SSE_INLINE */
|
|
||||||
|
|
||||||
#if HAVE_YASM
|
#if HAVE_YASM
|
||||||
#if HAVE_SSE_EXTERNAL
|
#if HAVE_SSE_EXTERNAL
|
||||||
|
c->vector_clipf = ff_vector_clipf_sse;
|
||||||
|
|
||||||
/* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
|
/* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
|
||||||
if (CONFIG_XVMC && avctx->hwaccel && avctx->hwaccel->decode_mb)
|
if (CONFIG_XVMC && avctx->hwaccel && avctx->hwaccel->decode_mb)
|
||||||
return;
|
return;
|
||||||
|
@ -506,37 +506,4 @@ void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride,
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void ff_vector_clipf_sse(float *dst, const float *src,
|
|
||||||
float min, float max, int len)
|
|
||||||
{
|
|
||||||
x86_reg i = (len - 16) * 4;
|
|
||||||
__asm__ volatile (
|
|
||||||
"movss %3, %%xmm4 \n\t"
|
|
||||||
"movss %4, %%xmm5 \n\t"
|
|
||||||
"shufps $0, %%xmm4, %%xmm4 \n\t"
|
|
||||||
"shufps $0, %%xmm5, %%xmm5 \n\t"
|
|
||||||
"1: \n\t"
|
|
||||||
"movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel
|
|
||||||
"movaps 16(%2, %0), %%xmm1 \n\t"
|
|
||||||
"movaps 32(%2, %0), %%xmm2 \n\t"
|
|
||||||
"movaps 48(%2, %0), %%xmm3 \n\t"
|
|
||||||
"maxps %%xmm4, %%xmm0 \n\t"
|
|
||||||
"maxps %%xmm4, %%xmm1 \n\t"
|
|
||||||
"maxps %%xmm4, %%xmm2 \n\t"
|
|
||||||
"maxps %%xmm4, %%xmm3 \n\t"
|
|
||||||
"minps %%xmm5, %%xmm0 \n\t"
|
|
||||||
"minps %%xmm5, %%xmm1 \n\t"
|
|
||||||
"minps %%xmm5, %%xmm2 \n\t"
|
|
||||||
"minps %%xmm5, %%xmm3 \n\t"
|
|
||||||
"movaps %%xmm0, (%1, %0) \n\t"
|
|
||||||
"movaps %%xmm1, 16(%1, %0) \n\t"
|
|
||||||
"movaps %%xmm2, 32(%1, %0) \n\t"
|
|
||||||
"movaps %%xmm3, 48(%1, %0) \n\t"
|
|
||||||
"sub $64, %0 \n\t"
|
|
||||||
"jge 1b \n\t"
|
|
||||||
: "+&r" (i)
|
|
||||||
: "r" (dst), "r" (src), "m" (min), "m" (max)
|
|
||||||
: "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* HAVE_INLINE_ASM */
|
#endif /* HAVE_INLINE_ASM */
|
||||||
|
Loading…
Reference in New Issue
Block a user