swscale/x86/rgb2rgb: fix deinterleaveBytes for unaligned dst pointers

This commit is contained in:
Ramiro Polla 2024-09-01 14:53:07 +02:00
parent 6df0c5f9f4
commit f17a6bd200

View File

@ -736,11 +736,11 @@ cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
packuswb m2, m3 ; (byte) { U0, ..., U15 }
packuswb m0, m1 ; (byte) { V0, ..., V15 }
%ifidn %2, nv12
mova [dstUq+wq], m2
mova [dstVq+wq], m0
mov%1 [dstUq+wq], m2
mov%1 [dstVq+wq], m0
%else ; nv21
mova [dstVq+wq], m2
mova [dstUq+wq], m0
mov%1 [dstVq+wq], m2
mov%1 [dstUq+wq], m0
%endif ; nv12/21
add wq, mmsize
jl .loop_%1
@ -750,15 +750,18 @@ cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
; %1 = nr. of XMM registers
; %2 = nv12 or nv21
%macro NVXX_TO_UV_FN 2
cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w
cglobal %2ToUV, 4, 5, %1, dstU, dstV, tmp, src, w
%if ARCH_X86_64
movsxd wq, dword r5m
%else ; x86-32
mov wq, r5m
%endif
mov tmpq, srcq
or tmpq, dstUq
or tmpq, dstVq
add dstUq, wq
add dstVq, wq
test srcq, 15
test tmpq, 15
lea srcq, [srcq+wq*2]
pcmpeqb m5, m5 ; (byte) { 0xff } x 16
psrlw m5, 8 ; (word) { 0x00ff } x 8