FFmpeg/libavcodec/arm/ac3dsp_armv6.S
pin xue 05c062e9da replace movw instruction in ac3dsp_armv6.S
AS      libavcodec/arm/ac3dsp_armv6.o
ffmpeg-src/libavcodec/arm/ac3dsp_armv6.S: Assembler messages:
ffmpeg-src/libavcodec/arm/ac3dsp_armv6.S:40: Error: selected processor
does not support `movw r8,#0x1fe0'
make[1]: *** [libavcodec/arm/ac3dsp_armv6.o] Error 1

MOVW is ARMv7 way to load constant:
   * movw, or move wide, will move a 16-bit constant into a register,
implicitly zeroing the top 16 bits of the target register.
   * movt, or move top, will move a 16-bit constant into the top half
of a given register without altering the bottom 16 bits
To load 32 bit constant, movw  lower16; movt upper16; is better than
ldr if available, because:
While this approach takes two instructions, it does not require any
extra space to store the constant so both the movw/movt method and the
ldr method will end up using the same amount of memory. Memory
bandwidth is precious in and the movw/movt approach avoids an extra
read on the data side, not to mention the read could have missed the
cache.

But here it is armv6 optimization, so that we have to use ldr.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
2011-04-10 03:49:24 +02:00

84 lines
3.4 KiB
ArmAsm

/*
* Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "asm.S"
function ff_ac3_bit_alloc_calc_bap_armv6, export=1
ldr r12, [sp]
cmp r12, #-960
beq 4f
push {r4-r11,lr}
add r5, sp, #40
movrel r4, X(ff_ac3_bin_to_band_tab)
movrel lr, X(ff_ac3_band_start_tab)
ldm r5, {r5-r7}
ldrb r4, [r4, r2]
add r1, r1, r2, lsl #1 @ psd + start
add r0, r0, r4, lsl #1 @ mask + band
add r4, lr, r4
add r7, r7, r2 @ bap + start
ldrb r10, [r4], #1
1:
ldrsh r9, [r0], #2 @ mask[band]
ldr r8, =0x1fe0
sub r9, r9, r12 @ - snr_offset
mov r11, r10
ldrb r10, [r4], #1 @ band_start_tab[band++]
subs r9, r9, r5 @ - floor
movlt r9, #0
cmp r10, r3 @ - end
and r9, r9, r8 @ & 0x1fe0
subgt r8, r3, r11
suble r8, r10, r11
add r9, r9, r5 @ + floor => m
tst r8, #1
add r2, r7, r8
bne 3f
b 5f
2:
ldrsh r8, [r1], #2
ldrsh lr, [r1], #2
sub r8, r8, r9
sub lr, lr, r9
usat r8, #6, r8, asr #5 @ address
usat lr, #6, lr, asr #5
ldrb r8, [r6, r8] @ bap_tab[address]
ldrb lr, [r6, lr]
strb r8, [r7], #1 @ bap[bin]
strb lr, [r7], #1
5: cmp r7, r2
blo 2b
cmp r3, r11
bgt 1b
pop {r4-r11,pc}
3:
ldrsh r8, [r1], #2 @ psd[bin]
sub r8, r8, r9 @ - m
usat r8, #6, r8, asr #5 @ address
ldrb r8, [r6, r8] @ bap_tab[address]
strb r8, [r7], #1 @ bap[bin]
b 5b
4:
ldr r0, [sp, #12]
mov r1, #0
mov r2, #256
b memset
endfunc