FFmpeg/libavcodec/arm/mpegaudiodsp_fixed_armv6.S
Justin Ruggles 94d2b0d2fd ARM: Move asm.S from libavcodec to libavutil
This will allow for easier implementation of ARM-optimized functions in
libraries other than libavcodec.
2012-06-08 13:14:38 -04:00

144 lines
4.9 KiB
ArmAsm

/*
* Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/arm/asm.S"
.macro skip args:vararg
.endm
.macro sum8 lo, hi, w, p, t1, t2, t3, t4, rsb=skip, offs=0
ldr \t1, [\w, #4*\offs]
ldr \t2, [\p, #4]!
\rsb \t1, \t1, #0
.irpc i, 135
ldr \t3, [\w, #4*64*\i+4*\offs]
ldr \t4, [\p, #4*64*\i]
smlal \lo, \hi, \t1, \t2
\rsb \t3, \t3, #0
ldr \t1, [\w, #4*64*(\i+1)+4*\offs]
ldr \t2, [\p, #4*64*(\i+1)]
smlal \lo, \hi, \t3, \t4
\rsb \t1, \t1, #0
.endr
ldr \t3, [\w, #4*64*7+4*\offs]
ldr \t4, [\p, #4*64*7]
smlal \lo, \hi, \t1, \t2
\rsb \t3, \t3, #0
smlal \lo, \hi, \t3, \t4
.endm
.macro round rd, lo, hi
lsr \rd, \lo, #24
bic \lo, \lo, #0xff000000
orr \rd, \rd, \hi, lsl #8
mov \hi, #0
ssat \rd, #16, \rd
.endm
function ff_mpadsp_apply_window_fixed_armv6, export=1
push {r2,r4-r11,lr}
add r4, r0, #4*512 @ synth_buf + 512
.rept 4
ldm r0!, {r5-r12}
stm r4!, {r5-r12}
.endr
ldr r4, [sp, #40] @ incr
sub r0, r0, #4*17 @ synth_buf + 16
ldr r8, [r2] @ sum:low
add r2, r0, #4*32 @ synth_buf + 48
rsb r5, r4, r4, lsl #5 @ 31 * incr
lsl r4, r4, #1
asr r9, r8, #31 @ sum:high
add r5, r3, r5, lsl #1 @ samples2
add r6, r1, #4*32 @ w2
str r4, [sp, #40]
sum8 r8, r9, r1, r0, r10, r11, r12, lr
sum8 r8, r9, r1, r2, r10, r11, r12, lr, rsb, 32
round r10, r8, r9
strh_post r10, r3, r4
mov lr, #15
1:
ldr r12, [r0, #4]!
ldr r11, [r6, #-4]!
ldr r10, [r1, #4]!
.irpc i, 0246
.if \i
ldr r11, [r6, #4*64*\i]
ldr r10, [r1, #4*64*\i]
.endif
rsb r11, r11, #0
smlal r8, r9, r10, r12
ldr r10, [r0, #4*64*(\i+1)]
.ifeq \i
smull r4, r7, r11, r12
.else
smlal r4, r7, r11, r12
.endif
ldr r11, [r6, #4*64*(\i+1)]
ldr r12, [r1, #4*64*(\i+1)]
rsb r11, r11, #0
smlal r8, r9, r12, r10
.iflt \i-6
ldr r12, [r0, #4*64*(\i+2)]
.else
ldr r12, [r2, #-4]!
.endif
smlal r4, r7, r11, r10
.endr
.irpc i, 0246
ldr r10, [r1, #4*64*\i+4*32]
rsb r12, r12, #0
ldr r11, [r6, #4*64*\i+4*32]
smlal r8, r9, r10, r12
ldr r10, [r2, #4*64*(\i+1)]
smlal r4, r7, r11, r12
ldr r12, [r1, #4*64*(\i+1)+4*32]
rsb r10, r10, #0
ldr r11, [r6, #4*64*(\i+1)+4*32]
smlal r8, r9, r12, r10
.iflt \i-6
ldr r12, [r2, #4*64*(\i+2)]
.else
ldr r12, [sp, #40]
.endif
smlal r4, r7, r11, r10
.endr
round r10, r8, r9
adds r8, r8, r4
adc r9, r9, r7
strh_post r10, r3, r12
round r11, r8, r9
subs lr, lr, #1
strh_dpost r11, r5, r12
bgt 1b
sum8 r8, r9, r1, r0, r10, r11, r12, lr, rsb, 33
pop {r4}
round r10, r8, r9
str r8, [r4]
strh r10, [r3]
pop {r4-r11,pc}
endfunc