Built SDL2_image and _mixer static
This commit is contained in:
394
libsdl2_mixer/external/mpg123-1.25.6/src/libmpg123/dct36_x86_64.S
vendored
Normal file
394
libsdl2_mixer/external/mpg123-1.25.6/src/libmpg123/dct36_x86_64.S
vendored
Normal file
@@ -0,0 +1,394 @@
|
||||
/*
|
||||
dct36_x86_64: SSE optimized dct36 for x86-64
|
||||
|
||||
copyright 1995-2013 by the mpg123 project - free software under the terms of the LGPL 2.1
|
||||
see COPYING and AUTHORS files in distribution or http://mpg123.org
|
||||
initially written by Taihei Monma
|
||||
*/
|
||||
|
||||
#include "mangle.h"
|
||||
|
||||
#ifdef IS_MSABI
|
||||
#define in %rcx
|
||||
#define out1 %rdx
|
||||
#define out2 %r8
|
||||
#define w %r9
|
||||
#define ts %r10
|
||||
#define COS9_ %rax
|
||||
#define tfcos36_ %r11
|
||||
#else
|
||||
#define in %rdi
|
||||
#define out1 %rsi
|
||||
#define out2 %rdx
|
||||
#define w %rcx
|
||||
#define ts %r8
|
||||
#define COS9_ %rax
|
||||
#define tfcos36_ %r9
|
||||
#endif
|
||||
|
||||
/*
|
||||
void dct36_x86_64(real *inbuf,real *o1,real *o2,real *wintab,real *tsbuf);
|
||||
*/
|
||||
|
||||
#ifndef __APPLE__
|
||||
.section .rodata
|
||||
#else
|
||||
.data
|
||||
#endif
|
||||
ALIGN16
|
||||
dct36_x86_64_COS9:
|
||||
.long 0x3f5db3d7
|
||||
.long 0x3f5db3d7
|
||||
.long 0x3f000000
|
||||
.long 0x3f000000
|
||||
.long 0x3f7c1c5c
|
||||
.long 0x3f7c1c5c
|
||||
.long 0x3f708fb2
|
||||
.long 0x3f708fb2
|
||||
.long 0x3f248dbb
|
||||
.long 0x3f248dbb
|
||||
.long 0x3e31d0d4
|
||||
.long 0x3e31d0d4
|
||||
.long 0x3eaf1d44
|
||||
.long 0x3eaf1d44
|
||||
.long 0x3f441b7d
|
||||
.long 0x3f441b7d
|
||||
ALIGN16
|
||||
dct36_x86_64_tfcos36:
|
||||
.long 0x3f007d2b
|
||||
.long 0x3f0483ee
|
||||
.long 0x3f0d3b7d
|
||||
.long 0x3f1c4257
|
||||
.long 0x40b79454
|
||||
.long 0x3ff746ea
|
||||
.long 0x3f976fd9
|
||||
.long 0x3f5f2944
|
||||
.long 0x3f3504f3
|
||||
ALIGN16
|
||||
dct36_x86_64_mask:
|
||||
.long 0,0xffffffff,0,0xffffffff
|
||||
ALIGN16
|
||||
dct36_x86_64_sign:
|
||||
.long 0x80000000,0x80000000,0x80000000,0x80000000
|
||||
.text
|
||||
ALIGN16
|
||||
.globl ASM_NAME(dct36_x86_64)
|
||||
ASM_NAME(dct36_x86_64):
|
||||
#ifdef IS_MSABI
|
||||
push %rbp
|
||||
mov %rsp, %rbp
|
||||
sub $160, %rsp
|
||||
movaps %xmm6, (%rsp)
|
||||
movaps %xmm7, 16(%rsp)
|
||||
movaps %xmm8, 32(%rsp)
|
||||
movaps %xmm9, 48(%rsp)
|
||||
movaps %xmm10, 64(%rsp)
|
||||
movaps %xmm11, 80(%rsp)
|
||||
movaps %xmm12, 96(%rsp)
|
||||
movaps %xmm13, 112(%rsp)
|
||||
movaps %xmm14, 128(%rsp)
|
||||
movaps %xmm15, 144(%rsp)
|
||||
movq 48(%rbp), ts
|
||||
#endif
|
||||
lea dct36_x86_64_COS9(%rip), COS9_
|
||||
lea dct36_x86_64_tfcos36(%rip), tfcos36_
|
||||
|
||||
xorps %xmm5, %xmm5
|
||||
movups (in), %xmm1
|
||||
movups 16(in), %xmm2
|
||||
movups 32(in), %xmm3
|
||||
movups 48(in), %xmm4
|
||||
movlps 64(in), %xmm5
|
||||
xorps %xmm6, %xmm6
|
||||
movaps %xmm1, %xmm7
|
||||
shufps $0x93, %xmm7, %xmm7
|
||||
movaps %xmm2, %xmm8
|
||||
shufps $0x93, %xmm8, %xmm8
|
||||
movaps %xmm3, %xmm9
|
||||
shufps $0x93, %xmm9, %xmm9
|
||||
movaps %xmm4, %xmm10
|
||||
shufps $0x93, %xmm10, %xmm10
|
||||
movaps %xmm5, %xmm11
|
||||
shufps $0xe1, %xmm11, %xmm11
|
||||
movss %xmm10, %xmm11
|
||||
addps %xmm11, %xmm5
|
||||
movss %xmm9, %xmm10
|
||||
addps %xmm10, %xmm4
|
||||
movss %xmm8, %xmm9
|
||||
addps %xmm9, %xmm3
|
||||
movss %xmm7, %xmm8
|
||||
addps %xmm8, %xmm2
|
||||
movss %xmm6, %xmm7
|
||||
addps %xmm7, %xmm1
|
||||
|
||||
movaps dct36_x86_64_mask(%rip), %xmm0
|
||||
movaps %xmm4, %xmm6
|
||||
shufps $0x4e, %xmm5, %xmm4
|
||||
movaps %xmm3, %xmm7
|
||||
shufps $0x4e, %xmm6, %xmm3
|
||||
andps %xmm0, %xmm6
|
||||
addps %xmm6, %xmm4
|
||||
movaps %xmm2, %xmm6
|
||||
shufps $0x4e, %xmm7, %xmm2
|
||||
andps %xmm0, %xmm7
|
||||
addps %xmm7, %xmm3
|
||||
movaps %xmm1, %xmm7
|
||||
shufps $0x4e, %xmm6, %xmm1
|
||||
andps %xmm0, %xmm6
|
||||
addps %xmm6, %xmm2
|
||||
movaps %xmm7, %xmm6
|
||||
andps %xmm0, %xmm7
|
||||
xorps %xmm0, %xmm0
|
||||
addps %xmm7, %xmm1
|
||||
movlhps %xmm6, %xmm0
|
||||
|
||||
/*
|
||||
xmm0 in[-,-,0,1]
|
||||
xmm1 in[2,3,4,5]
|
||||
xmm2 in[6,7,8,9]
|
||||
xmm3 in[10,11,12,13]
|
||||
xmm4 in[14,15,16,17]
|
||||
*/
|
||||
|
||||
movaps %xmm2, %xmm5
|
||||
shufps $0xe4, %xmm3, %xmm5
|
||||
shufps $0xe4, %xmm4, %xmm3
|
||||
shufps $0xe4, %xmm2, %xmm4
|
||||
movaps %xmm5, %xmm2
|
||||
/*
|
||||
xmm2 in[6,7,12,13]
|
||||
xmm3 in[10,11,16,17]
|
||||
xmm4 in[14,15,8,9]
|
||||
*/
|
||||
|
||||
movaps (COS9_), %xmm15
|
||||
movaps 16(COS9_), %xmm6
|
||||
movaps 32(COS9_), %xmm7
|
||||
movaps 48(COS9_), %xmm8
|
||||
mulps %xmm15, %xmm5
|
||||
addps %xmm0, %xmm5
|
||||
|
||||
/*
|
||||
xmm5 [ta33,tb33,ta66,tb66]
|
||||
xmm6 COS9_[1,1,2,2]
|
||||
xmm7 COS9_[5,5,8,8]
|
||||
xmm8 COS9_[7,7,4,4]
|
||||
xmm15 COS9_[3,3,6,6]
|
||||
*/
|
||||
movaps %xmm6, %xmm9
|
||||
movaps %xmm7, %xmm12
|
||||
movaps %xmm8, %xmm13
|
||||
mulps %xmm1, %xmm9
|
||||
mulps %xmm3, %xmm12
|
||||
mulps %xmm4, %xmm13
|
||||
addps %xmm5, %xmm9
|
||||
addps %xmm13, %xmm12
|
||||
addps %xmm9, %xmm12
|
||||
|
||||
movaps %xmm1, %xmm13
|
||||
subps %xmm3, %xmm13
|
||||
movaps %xmm0, %xmm10
|
||||
shufps $0xe0, %xmm2, %xmm10
|
||||
movaps %xmm0, %xmm14
|
||||
subps %xmm10, %xmm14
|
||||
subps %xmm4, %xmm13
|
||||
mulps %xmm15, %xmm13
|
||||
addps %xmm14, %xmm13
|
||||
|
||||
movaps %xmm7, %xmm9
|
||||
movaps %xmm8, %xmm15
|
||||
movaps %xmm6, %xmm14
|
||||
mulps %xmm1, %xmm9
|
||||
mulps %xmm3, %xmm15
|
||||
mulps %xmm4, %xmm14
|
||||
subps %xmm5, %xmm9
|
||||
subps %xmm15, %xmm14
|
||||
addps %xmm9, %xmm14
|
||||
|
||||
mulps %xmm1, %xmm8
|
||||
mulps %xmm3, %xmm6
|
||||
mulps %xmm4, %xmm7
|
||||
subps %xmm5, %xmm8
|
||||
subps %xmm7, %xmm6
|
||||
addps %xmm6, %xmm8
|
||||
movaps %xmm8, %xmm15
|
||||
|
||||
movss 32(tfcos36_), %xmm5
|
||||
subps %xmm1, %xmm0
|
||||
subps %xmm2, %xmm4
|
||||
addps %xmm3, %xmm0
|
||||
addps %xmm4, %xmm0
|
||||
shufps $0xaf, %xmm0, %xmm0
|
||||
mulss %xmm5, %xmm0
|
||||
movaps %xmm0, %xmm11
|
||||
|
||||
/*
|
||||
xmm12 [1a-0,1b-0, 2a-0, 2b-0]
|
||||
xmm13 [1a-1,1b-1, 2a-1, 2b-1]
|
||||
xmm14 [1a-2,1b-2,-2a-2,-2b-2]
|
||||
xmm15 [1a-3,1b-3,-2a-3,-2b-3]
|
||||
*/
|
||||
movaps %xmm12, %xmm5
|
||||
unpckhps %xmm13, %xmm5
|
||||
unpcklps %xmm13, %xmm12
|
||||
movaps %xmm14, %xmm6
|
||||
unpckhps %xmm15, %xmm6
|
||||
unpcklps %xmm15, %xmm14
|
||||
xorps dct36_x86_64_sign(%rip), %xmm6
|
||||
|
||||
/*
|
||||
xmm12 [1a-0,1a-1,1b-0,1b-1]
|
||||
xmm5 [2a-0,2a-1,2b-0,2b-1]
|
||||
xmm14 [1a-2,1a-3,1b-2,1b-3]
|
||||
xmm6 [2a-2,2a-3,2b-2,2b-3]
|
||||
*/
|
||||
|
||||
movaps %xmm12, %xmm0
|
||||
movlhps %xmm14, %xmm12
|
||||
movhlps %xmm0, %xmm14
|
||||
movaps %xmm5, %xmm0
|
||||
movlhps %xmm6, %xmm0
|
||||
movhlps %xmm5, %xmm6
|
||||
movaps %xmm6, %xmm15
|
||||
|
||||
/*
|
||||
xmm12 tmp1a
|
||||
xmm0 tmp2a
|
||||
xmm14 tmp1b
|
||||
xmm15 tmp2b
|
||||
*/
|
||||
|
||||
movaps (tfcos36_), %xmm6
|
||||
movaps 16(tfcos36_), %xmm7
|
||||
movaps %xmm15, %xmm10
|
||||
addps %xmm14, %xmm15
|
||||
subps %xmm14, %xmm10
|
||||
movaps %xmm0, %xmm14
|
||||
addps %xmm12, %xmm0
|
||||
subps %xmm12, %xmm14
|
||||
mulps %xmm6, %xmm15
|
||||
mulps %xmm10, %xmm7
|
||||
|
||||
/*
|
||||
%xmm0 tmp[0,1,2,3]
|
||||
%xmm15 tmp[17,16,15,14]
|
||||
%xmm14 tmp[8,7,6,5]
|
||||
%xmm7 tmp[9,10,11,12]
|
||||
%xmm11 tmp[13,-,4,-]
|
||||
*/
|
||||
|
||||
movaps %xmm15, %xmm1
|
||||
movups 108(w), %xmm2
|
||||
movups 92(w), %xmm3
|
||||
shufps $0x1b, %xmm3, %xmm3
|
||||
movups 36(w), %xmm4
|
||||
movups 20(w), %xmm5
|
||||
shufps $0x1b, %xmm5, %xmm5
|
||||
movaps %xmm0, %xmm6
|
||||
addps %xmm1, %xmm0
|
||||
subps %xmm1, %xmm6
|
||||
mulps %xmm0, %xmm2
|
||||
mulps %xmm3, %xmm0
|
||||
mulps %xmm6, %xmm4
|
||||
mulps %xmm5, %xmm6
|
||||
movups 36(out1), %xmm1
|
||||
movups 20(out1), %xmm3
|
||||
shufps $0x1b, %xmm6, %xmm6
|
||||
addps %xmm4, %xmm1
|
||||
addps %xmm6, %xmm3
|
||||
shufps $0x1b, %xmm0, %xmm0
|
||||
movups %xmm2, 36(out2)
|
||||
movups %xmm0, 20(out2)
|
||||
movss %xmm1, 32*36(ts)
|
||||
movss %xmm3, 32*20(ts)
|
||||
movhlps %xmm1, %xmm2
|
||||
movhlps %xmm3, %xmm4
|
||||
movss %xmm2, 32*44(ts)
|
||||
movss %xmm4, 32*28(ts)
|
||||
shufps $0xb1, %xmm1, %xmm1
|
||||
shufps $0xb1, %xmm3, %xmm3
|
||||
movss %xmm1, 32*40(ts)
|
||||
movss %xmm3, 32*24(ts)
|
||||
movhlps %xmm1, %xmm2
|
||||
movhlps %xmm3, %xmm4
|
||||
movss %xmm2, 32*48(ts)
|
||||
movss %xmm4, 32*32(ts)
|
||||
|
||||
movhlps %xmm11, %xmm0
|
||||
movaps %xmm11, %xmm1
|
||||
movss 124(w), %xmm2
|
||||
movss 88(w), %xmm3
|
||||
movss 52(w), %xmm4
|
||||
movss 16(w), %xmm5
|
||||
movss %xmm0, %xmm6
|
||||
addss %xmm1, %xmm0
|
||||
subss %xmm1, %xmm6
|
||||
mulss %xmm0, %xmm2
|
||||
mulss %xmm3, %xmm0
|
||||
mulss %xmm6, %xmm4
|
||||
mulss %xmm5, %xmm6
|
||||
addss 52(out1), %xmm4
|
||||
addss 16(out1), %xmm6
|
||||
movss %xmm2, 52(out2)
|
||||
movss %xmm0, 16(out2)
|
||||
movss %xmm4, 32*52(ts)
|
||||
movss %xmm6, 32*16(ts)
|
||||
|
||||
movaps %xmm14, %xmm0
|
||||
movaps %xmm7, %xmm1
|
||||
MOVUAPS 128(w), %xmm2
|
||||
movups 72(w), %xmm3
|
||||
shufps $0x1b, %xmm2, %xmm2
|
||||
movlps 56(w), %xmm4
|
||||
movhps 64(w), %xmm4
|
||||
MOVUAPS (w), %xmm5
|
||||
shufps $0x1b, %xmm4, %xmm4
|
||||
movaps %xmm0, %xmm6
|
||||
addps %xmm1, %xmm0
|
||||
subps %xmm1, %xmm6
|
||||
mulps %xmm0, %xmm2
|
||||
mulps %xmm3, %xmm0
|
||||
mulps %xmm6, %xmm4
|
||||
mulps %xmm5, %xmm6
|
||||
movlps 56(out1), %xmm1
|
||||
movhps 64(out1), %xmm1
|
||||
movups (out1), %xmm3
|
||||
shufps $0x1b, %xmm4, %xmm4
|
||||
addps %xmm6, %xmm3
|
||||
addps %xmm4, %xmm1
|
||||
shufps $0x1b, %xmm2, %xmm2
|
||||
movups %xmm0, (out2)
|
||||
movlps %xmm2, 56(out2)
|
||||
movhps %xmm2, 64(out2)
|
||||
movss %xmm1, 32*56(ts)
|
||||
movss %xmm3, (ts)
|
||||
movhlps %xmm1, %xmm2
|
||||
movhlps %xmm3, %xmm4
|
||||
movss %xmm2, 32*64(ts)
|
||||
movss %xmm4, 32*8(ts)
|
||||
shufps $0xb1, %xmm1, %xmm1
|
||||
shufps $0xb1, %xmm3, %xmm3
|
||||
movss %xmm1, 32*60(ts)
|
||||
movss %xmm3, 32*4(ts)
|
||||
movhlps %xmm1, %xmm2
|
||||
movhlps %xmm3, %xmm4
|
||||
movss %xmm2, 32*68(ts)
|
||||
movss %xmm4, 32*12(ts)
|
||||
|
||||
#ifdef IS_MSABI
|
||||
movaps (%rsp), %xmm6
|
||||
movaps 16(%rsp), %xmm7
|
||||
movaps 32(%rsp), %xmm8
|
||||
movaps 48(%rsp), %xmm9
|
||||
movaps 64(%rsp), %xmm10
|
||||
movaps 80(%rsp), %xmm11
|
||||
movaps 96(%rsp), %xmm12
|
||||
movaps 112(%rsp), %xmm13
|
||||
movaps 128(%rsp), %xmm14
|
||||
movaps 144(%rsp), %xmm15
|
||||
mov %rbp, %rsp
|
||||
pop %rbp
|
||||
#endif
|
||||
ret
|
||||
|
||||
NONEXEC_STACK
|
||||
Reference in New Issue
Block a user