Built SDL2_image and _mixer static
This commit is contained in:
136
libsdl2_mixer/external/mpg123-1.25.6/src/libmpg123/synth_neon64_s32.S
vendored
Normal file
136
libsdl2_mixer/external/mpg123-1.25.6/src/libmpg123/synth_neon64_s32.S
vendored
Normal file
@@ -0,0 +1,136 @@
|
||||
/*
|
||||
synth_neon64_s32: NEON optimized synth for AArch64 (32-bit output version)
|
||||
|
||||
copyright 1995-2014 by the mpg123 project - free software under the terms of the LGPL 2.1
|
||||
see COPYING and AUTHORS files in distribution or http://mpg123.org
|
||||
initially written by Taihei Monma
|
||||
*/
|
||||
|
||||
#include "mangle.h"
|
||||
|
||||
#ifndef __APPLE__
|
||||
.section .rodata
|
||||
#else
|
||||
.data
|
||||
#endif
|
||||
ALIGN16
|
||||
maxmin_s32:
|
||||
.word 1191182335
|
||||
.word -956301312
|
||||
.word 1199570944
|
||||
.text
|
||||
ALIGN4
|
||||
.globl ASM_NAME(synth_1to1_s32_neon64_asm)
|
||||
#ifdef __ELF__
|
||||
.type ASM_NAME(synth_1to1_s32_neon64_asm), %function
|
||||
#endif
|
||||
ASM_NAME(synth_1to1_s32_neon64_asm):
|
||||
add x0, x0, #64
|
||||
sub x0, x0, x3, lsl #2
|
||||
eor v31.16b, v31.16b, v31.16b
|
||||
adrp x5, AARCH64_PCREL_HI(maxmin_s32)
|
||||
add x5, x5, AARCH64_PCREL_LO(maxmin_s32)
|
||||
ld3r {v28.4s,v29.4s,v30.4s}, [x5]
|
||||
|
||||
mov w4, #4
|
||||
mov x5, #128
|
||||
1:
|
||||
ld1 {v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
|
||||
ld1 {v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
|
||||
ld1 {v16.4s,v17.4s,v18.4s,v19.4s}, [x1], #64
|
||||
ld1 {v20.4s,v21.4s,v22.4s,v23.4s}, [x1], #64
|
||||
|
||||
fmul v24.4s, v0.4s, v16.4s
|
||||
fmul v25.4s, v4.4s, v20.4s
|
||||
fmla v24.4s, v1.4s, v17.4s
|
||||
fmla v25.4s, v5.4s, v21.4s
|
||||
fmla v24.4s, v2.4s, v18.4s
|
||||
fmla v25.4s, v6.4s, v22.4s
|
||||
fmla v24.4s, v3.4s, v19.4s
|
||||
fmla v25.4s, v7.4s, v23.4s
|
||||
|
||||
ld1 {v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
|
||||
ld1 {v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
|
||||
ld1 {v16.4s,v17.4s,v18.4s,v19.4s}, [x1], #64
|
||||
ld1 {v20.4s,v21.4s,v22.4s,v23.4s}, [x1], #64
|
||||
|
||||
fmul v26.4s, v0.4s, v16.4s
|
||||
fmul v27.4s, v4.4s, v20.4s
|
||||
fmla v26.4s, v1.4s, v17.4s
|
||||
fmla v27.4s, v5.4s, v21.4s
|
||||
fmla v26.4s, v2.4s, v18.4s
|
||||
fmla v27.4s, v6.4s, v22.4s
|
||||
fmla v26.4s, v3.4s, v19.4s
|
||||
fmla v27.4s, v7.4s, v23.4s
|
||||
|
||||
faddp v0.4s, v24.4s, v25.4s
|
||||
faddp v1.4s, v26.4s, v27.4s
|
||||
faddp v0.4s, v0.4s, v1.4s
|
||||
fmul v1.4s, v0.4s, v30.4s
|
||||
ld2 {v4.4s,v5.4s}, [x2]
|
||||
fcvtns v4.4s, v1.4s
|
||||
fcmgt v2.4s, v0.4s, v28.4s
|
||||
fcmgt v3.4s, v29.4s, v0.4s
|
||||
add v2.4s, v2.4s, v3.4s
|
||||
add v31.4s, v31.4s, v2.4s
|
||||
st2 {v4.4s,v5.4s}, [x2], #32
|
||||
|
||||
subs w4, w4, #1
|
||||
b.ne 1b
|
||||
|
||||
mov w4, #4
|
||||
mov x6, #-64
|
||||
2:
|
||||
ld1 {v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
|
||||
ld1 {v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
|
||||
ld1 {v16.4s,v17.4s,v18.4s,v19.4s}, [x1], x6
|
||||
ld1 {v20.4s,v21.4s,v22.4s,v23.4s}, [x1], x6
|
||||
|
||||
fmul v24.4s, v0.4s, v16.4s
|
||||
fmul v25.4s, v4.4s, v20.4s
|
||||
fmla v24.4s, v1.4s, v17.4s
|
||||
fmla v25.4s, v5.4s, v21.4s
|
||||
fmla v24.4s, v2.4s, v18.4s
|
||||
fmla v25.4s, v6.4s, v22.4s
|
||||
fmla v24.4s, v3.4s, v19.4s
|
||||
fmla v25.4s, v7.4s, v23.4s
|
||||
|
||||
ld1 {v0.4s,v1.4s,v2.4s,v3.4s}, [x0], x5
|
||||
ld1 {v4.4s,v5.4s,v6.4s,v7.4s}, [x0], x5
|
||||
ld1 {v16.4s,v17.4s,v18.4s,v19.4s}, [x1], x6
|
||||
ld1 {v20.4s,v21.4s,v22.4s,v23.4s}, [x1], x6
|
||||
|
||||
fmul v26.4s, v0.4s, v16.4s
|
||||
fmul v27.4s, v4.4s, v20.4s
|
||||
fmla v26.4s, v1.4s, v17.4s
|
||||
fmla v27.4s, v5.4s, v21.4s
|
||||
fmla v26.4s, v2.4s, v18.4s
|
||||
fmla v27.4s, v6.4s, v22.4s
|
||||
fmla v26.4s, v3.4s, v19.4s
|
||||
fmla v27.4s, v7.4s, v23.4s
|
||||
|
||||
faddp v0.4s, v24.4s, v25.4s
|
||||
faddp v1.4s, v26.4s, v27.4s
|
||||
faddp v0.4s, v0.4s, v1.4s
|
||||
fmul v1.4s, v0.4s, v30.4s
|
||||
ld2 {v4.4s,v5.4s}, [x2]
|
||||
fcvtns v4.4s, v1.4s
|
||||
fcmgt v2.4s, v0.4s, v28.4s
|
||||
fcmgt v3.4s, v29.4s, v0.4s
|
||||
add v2.4s, v2.4s, v3.4s
|
||||
add v31.4s, v31.4s, v2.4s
|
||||
st2 {v4.4s,v5.4s}, [x2], #32
|
||||
|
||||
subs w4, w4, #1
|
||||
b.ne 2b
|
||||
|
||||
AARCH64_DUP_2D(v0, v31, 1)
|
||||
add v0.4s, v0.4s, v31.4s
|
||||
AARCH64_DUP_4S(v1, v0, 1)
|
||||
add v0.4s, v0.4s, v1.4s
|
||||
umov w0, v0.s[0]
|
||||
neg w0, w0
|
||||
|
||||
ret
|
||||
|
||||
NONEXEC_STACK
|
||||
Reference in New Issue
Block a user