Built SDL2_image and _mixer static
This commit is contained in:
476
libsdl2_image/external/jpeg-9b/jidctfst.S
vendored
Normal file
476
libsdl2_image/external/jpeg-9b/jidctfst.S
vendored
Normal file
@ -0,0 +1,476 @@
|
||||
/*
|
||||
* Copyright (C) 2008 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <machine/cpu-features.h>
|
||||
|
||||
.text
|
||||
.align
|
||||
|
||||
.global jpeg_idct_ifast
|
||||
.func jpeg_idct_ifast
|
||||
|
||||
// NOTE: sb=r9, fp=r11 ip=r12, sp=r13, lr=r14, pc=r15
|
||||
|
||||
// jpeg_idct_ifast (j_decompress_ptr cinfo,
|
||||
// jpeg_component_info * compptr,
|
||||
// short* coef_block,
|
||||
// unsigned char* output_buf,
|
||||
// int output_col)
|
||||
|
||||
#define local_TMP0123 sp
|
||||
#define local_TMP0 [sp, #0]
|
||||
#define local_TMP1 [sp, #4]
|
||||
#define local_TMP2 [sp, #8]
|
||||
#define local_TMP3 [sp, #12]
|
||||
#define local_RANGE_TABLE [sp, #16]
|
||||
#define local_OUTPUT_COL [sp, #20]
|
||||
#define local_OUTPUT_BUF [sp, #24]
|
||||
#define local_UNUSED [sp, #28]
|
||||
#define off_WORKSPACE 32
|
||||
#define local_WORKSPACE [sp, #offWORKSPACE]
|
||||
#define local_SIZE (off_WORKSPACE + 8*8*4)
|
||||
|
||||
#define off_DECOMPRESS_range_limit_base 324
|
||||
#define off_COMPINFO_quanttable 80
|
||||
|
||||
#define DCTSIZE 8
|
||||
#define VY(x) ((x)*DCTSIZE*2)
|
||||
#define QY(x) ((x)*DCTSIZE*4)
|
||||
|
||||
#define VX(x) ((x)*2)
|
||||
#define QX(x) ((x)*4)
|
||||
|
||||
#define FIX_1_414213562 #362
|
||||
#define FIX_1_082392200 #277
|
||||
#define FIX_1_847759065 #473
|
||||
#define FIX_2_613125930 #669
|
||||
|
||||
#define RANGE_MASK 1023
|
||||
|
||||
|
||||
|
||||
jpeg_idct_ifast:
|
||||
PLD [r2, #0]
|
||||
stmdb sp!, {r4,r5, r6,r7, r8,r9, r10,r11, r12,lr}
|
||||
ldr r4, [sp, #4*10]
|
||||
sub sp, #local_SIZE
|
||||
|
||||
ldr r10,[r1, #off_COMPINFO_quanttable] // r10 = quanttable
|
||||
str r4, local_OUTPUT_COL
|
||||
str r3, local_OUTPUT_BUF
|
||||
ldr r5, [r0, #off_DECOMPRESS_range_limit_base]
|
||||
add r5, r5, #128
|
||||
str r5, local_RANGE_TABLE
|
||||
mov fp, r2 // fp = coef_block
|
||||
add ip, sp, #off_WORKSPACE
|
||||
|
||||
VLoopTail:
|
||||
ldrsh r0, [fp, #VY(0)]
|
||||
ldrsh r1, [fp, #VY(1)]
|
||||
ldrsh r2, [fp, #VY(2)]
|
||||
ldrsh r3, [fp, #VY(3)]
|
||||
ldrsh r4, [fp, #VY(4)]
|
||||
ldrsh r5, [fp, #VY(5)]
|
||||
ldrsh r6, [fp, #VY(6)]
|
||||
ldrsh r7, [fp, #VY(7)]
|
||||
|
||||
cmp r1, #0
|
||||
orreqs r8, r2, r3
|
||||
orreqs r8, r4, r5
|
||||
orreqs r8, r6, r7
|
||||
beq VLoopHeadZero
|
||||
|
||||
VLoopHead:
|
||||
// tmp0 = DEQUANTIZE(in[DCTSIZE*0], quant[DCTSIZE*0] (r0)
|
||||
// tmp2 = DEQUANTIZE(in[DCTSIZE*4], quant[DCTSIZE*4] (r4)
|
||||
// tmp1 = DEQUANTIZE(in[DCTSIZE*2], quant[DCTSIZE*2] (r2)
|
||||
// tmp3 = DEQUANTIZE(in[DCTSIZE*6], quant[DCTSIZE*6] (r6)
|
||||
// tmp10 = tmp0 + tmp2 (r0)
|
||||
// tmp11 = tmp0 - tmp2 (r4)
|
||||
|
||||
ldr r9, [r10, #QY(4)]
|
||||
ldr r8, [r10, #QY(0)]
|
||||
#if __ARM_HAVE_HALFWORD_MULTIPLY
|
||||
smulbb r4, r9, r4
|
||||
smlabb r0, r8, r0, r4
|
||||
#else
|
||||
mul r4, r9, r4
|
||||
mul r0, r8, r0
|
||||
add r0, r4
|
||||
#endif
|
||||
ldr r9, [r10, #QY(6)]
|
||||
ldr r8, [r10, #QY(2)]
|
||||
sub r4, r0, r4, lsl #1
|
||||
#if __ARM_HAVE_HALFWORD_MULTIPLY
|
||||
smulbb r6, r9, r6
|
||||
smlabb r2, r8, r2, r6
|
||||
#else
|
||||
mul r6, r9, r6
|
||||
mul r2, r8, r2
|
||||
add r2, r6
|
||||
#endif
|
||||
|
||||
// tmp13 = tmp1 + tmp3 (r2)
|
||||
// tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13 (r6)
|
||||
// FIX_1_4142... = 362 = 45*8 + 2
|
||||
sub r6, r2, r6, lsl #1
|
||||
mov r8, #360
|
||||
add r8, r8, #2
|
||||
mul r9, r6, r8
|
||||
|
||||
// tmp0 = tmp10 + tmp13; (r0)
|
||||
// tmp3 = tmp10 - tmp13; (r8)
|
||||
// tmp1 = tmp11 + tmp12; (r4)
|
||||
// tmp2 = tmp11 - tmp12; (r6)
|
||||
add r0, r0, r2
|
||||
rsb r6, r2, r9, asr #8
|
||||
sub r8, r0, r2, lsl #1
|
||||
add r4, r4, r6
|
||||
sub r6, r4, r6, lsl #1
|
||||
|
||||
stmia local_TMP0123, {r0, r4, r6, r8}
|
||||
|
||||
// NOTE: be sure to not user r0,r4,r6,r8 soon after stm above
|
||||
|
||||
// odd part
|
||||
// tmp4 = DEQUANTIZE( in[DCTSIZE*1], quant[DCTSIZE*1] ) (r1)
|
||||
// tmp6 = DEQUANTIZE( in[DCTSIZE*5], quant[DCTSIZE*5] ) (r5)
|
||||
// tmp5 = DEQUANTIZE( in[DCTSIZE*3], quant[DCTSIZE*3] ) (r3)
|
||||
// tmp7 = DEQUANTIZE( in[DCTSIZE*7], quant[DCTSIZE*7] ) (r7)
|
||||
// z13 = tmp6 + tmp5; (r0)
|
||||
// z10 = tmp6 - tmp5; (r2)
|
||||
// z11 = tmp4 + tmp7; (r4)
|
||||
// z12 = tmp4 - tmp7; (r6)
|
||||
|
||||
ldr r2, [r10, #QY(1)]
|
||||
ldr r9, [r10, #QY(5)]
|
||||
#if __ARM_HAVE_HALFWORD_MULTIPLY
|
||||
smulbb r1, r2, r1
|
||||
#else
|
||||
mul r1, r2, r1
|
||||
#endif
|
||||
ldr r2, [r10, #QY(3)]
|
||||
#if __ARM_HAVE_HALFWORD_MULTIPLY
|
||||
smulbb r5, r9, r5
|
||||
#else
|
||||
mul r5, r9, r5
|
||||
#endif
|
||||
ldr r9, [r10, #QY(7)]
|
||||
#if __ARM_HAVE_HALFWORD_MULTIPLY
|
||||
smlabb r0, r2, r3, r5
|
||||
smlabb r4, r9, r7, r1
|
||||
#else
|
||||
mul r0, r2, r3
|
||||
add r0, r5
|
||||
mul r4, r9, r7
|
||||
add r4, r1
|
||||
#endif
|
||||
rsb r2, r0, r5, lsl #1
|
||||
rsb r6, r4, r1, lsl #1
|
||||
|
||||
// tmp7 = z11 + z13; (r7)
|
||||
// tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); (r1)
|
||||
// FIX_... = 360 + 2
|
||||
add r7, r4, r0
|
||||
sub r1, r4, r0
|
||||
mov r8, #360
|
||||
add r8, r8, #2
|
||||
mul r1, r8, r1
|
||||
|
||||
// z5 = MULTIPLY(z10 + z12, FIX_1_847759065); (r8)
|
||||
// tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; (r0)
|
||||
// tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; (r2)
|
||||
// FIX_1_8477... = 473 = 472 + 1
|
||||
// FIX_1_082... = 277 = 276 + 1
|
||||
// FIX_2_... = 669 = 668 + 1
|
||||
add r8, r2, r6
|
||||
mov r9, #472
|
||||
mla r8, r9, r8, r8
|
||||
mov r9, #276
|
||||
mla r0, r6, r9, r6
|
||||
mov r9, #668
|
||||
mla r2, r9, r2, r2
|
||||
sub r0, r0, r8
|
||||
rsb r2, r2, r8
|
||||
|
||||
// tmp6 = tmp12 - tmp7; (r6)
|
||||
// tmp5 = tmp11 - tmp6; (r5)
|
||||
// tmp4 = tmp10 + tmp5; (r4)
|
||||
rsb r6, r7, r2, asr #8
|
||||
rsb r5, r6, r1, asr #8
|
||||
add r4, r5, r0, asr #8
|
||||
|
||||
ldmia local_TMP0123, {r0, r1, r2, r3}
|
||||
|
||||
// wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
|
||||
// wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
|
||||
// wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
|
||||
// wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
|
||||
// wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5);
|
||||
// wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
|
||||
// wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
|
||||
// wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
|
||||
|
||||
add r0, r0, r7
|
||||
sub r7, r0, r7, lsl #1
|
||||
add r1, r1, r6
|
||||
sub r6, r1, r6, lsl #1
|
||||
add r2, r2, r5
|
||||
sub r5, r2, r5, lsl #1
|
||||
sub r3, r3, r4
|
||||
add r4, r3, r4, lsl #1
|
||||
|
||||
str r0, [ip, #QY(0)]
|
||||
str r1, [ip, #QY(1)]
|
||||
str r2, [ip, #QY(2)]
|
||||
str r3, [ip, #QY(3)]
|
||||
str r4, [ip, #QY(4)]
|
||||
str r5, [ip, #QY(5)]
|
||||
str r6, [ip, #QY(6)]
|
||||
str r7, [ip, #QY(7)]
|
||||
|
||||
// inptr++; /* advance pointers to next column */
|
||||
// quantptr++;
|
||||
// wsptr++;
|
||||
add fp, fp, #2
|
||||
add r10, r10, #4
|
||||
add ip, ip, #4
|
||||
add r0, sp, #(off_WORKSPACE + 4*8)
|
||||
cmp ip, r0
|
||||
bne VLoopTail
|
||||
|
||||
|
||||
|
||||
HLoopStart:
|
||||
// reset pointers
|
||||
PLD [sp, #off_WORKSPACE]
|
||||
add ip, sp, #off_WORKSPACE
|
||||
ldr r10, local_RANGE_TABLE
|
||||
|
||||
HLoopTail:
|
||||
// output = *output_buf++ + output_col
|
||||
ldr r0, local_OUTPUT_BUF
|
||||
ldr r1, local_OUTPUT_COL
|
||||
ldr r2, [r0], #4
|
||||
str r0, local_OUTPUT_BUF
|
||||
add fp, r2, r1
|
||||
|
||||
PLD [ip, #32]
|
||||
ldmia ip!, {r0-r7}
|
||||
|
||||
cmp r1, #0
|
||||
orreqs r8, r2, r3
|
||||
orreqs r8, r4, r5
|
||||
orreqs r8, r6, r7
|
||||
beq HLoopTailZero
|
||||
|
||||
HLoopHead:
|
||||
// tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]); (r0)
|
||||
// tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]); (r4)
|
||||
add r0, r0, r4
|
||||
sub r4, r0, r4, lsl #1
|
||||
|
||||
// tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]); (r2)
|
||||
// tmp12 = MULTIPLY((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6], FIX_1_414213562) - tmp13; (r6)
|
||||
// FIX_... = 360 + 2
|
||||
add r2, r2, r6
|
||||
sub r6, r2, r6, lsl #1
|
||||
mov r8, #360
|
||||
add r8, r8, #2
|
||||
mul r6, r8, r6
|
||||
|
||||
// tmp0 = tmp10 + tmp13; (r0)
|
||||
// tmp3 = tmp10 - tmp13; (r8)
|
||||
// tmp1 = tmp11 + tmp12; (r4)
|
||||
// tmp2 = tmp11 - tmp12; (r6)
|
||||
add r0, r0, r2
|
||||
rsb r6, r2, r6, asr #8
|
||||
sub r8, r0, r2, lsl #1
|
||||
add r4, r4, r6
|
||||
sub r6, r4, r6, lsl #1
|
||||
|
||||
stmia local_TMP0123, {r0, r4, r6, r8}
|
||||
|
||||
// Odd part
|
||||
|
||||
// z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3]; (r0)
|
||||
// z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3]; (r2)
|
||||
// z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7]; (r4)
|
||||
// z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7]; (r6)
|
||||
add r0, r5, r3
|
||||
sub r2, r5, r3
|
||||
add r4, r1, r7
|
||||
sub r6, r1, r7
|
||||
|
||||
// tmp7 = z11 + z13; (r7)
|
||||
// tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); (r1)
|
||||
// FIX_... = 360 + 2
|
||||
add r7, r4, r0
|
||||
sub r1, r4, r0
|
||||
mov r8, #360
|
||||
add r8, r8, #2
|
||||
mul r1, r8, r1
|
||||
|
||||
// z5 = MULTIPLY(z10 + z12, FIX_1_847759065); (r8)
|
||||
// tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; (r0)
|
||||
// tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5; (r2)
|
||||
// FIX_1_8477... = 473 = 472 + 1
|
||||
// FIX_1_082... = 277 = 276 + 1
|
||||
// FIX_2_... = 669 = 668 + 1
|
||||
add r8, r2, r6
|
||||
mov r9, #472
|
||||
mla r8, r9, r8, r8
|
||||
mov r9, #276
|
||||
mla r0, r6, r9, r6
|
||||
mov r9, #668
|
||||
mla r2, r9, r2, r2
|
||||
sub r0, r0, r8
|
||||
sub r2, r8, r2
|
||||
|
||||
// tmp6 = tmp12 - tmp7; (r6)
|
||||
// tmp5 = tmp11 - tmp6; (r5)
|
||||
// tmp4 = tmp10 + tmp5; (r4)
|
||||
rsb r6, r7, r2, asr #8
|
||||
rsb r5, r6, r1, asr #8
|
||||
add r4, r5, r0, asr #8
|
||||
|
||||
ldmia local_TMP0123, {r0, r1, r2, r3}
|
||||
|
||||
// outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3) & RANGE_MASK];
|
||||
// outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3) & RANGE_MASK];
|
||||
// outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3) & RANGE_MASK];
|
||||
// outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3) & RANGE_MASK];
|
||||
// outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3) & RANGE_MASK];
|
||||
// outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3) & RANGE_MASK];
|
||||
// outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3) & RANGE_MASK];
|
||||
// outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3) & RANGE_MASK];
|
||||
|
||||
mov r8, #128
|
||||
add r0, r0, r7
|
||||
sub r7, r0, r7, lsl #1
|
||||
add r0, r8, r0, asr #5
|
||||
add r7, r8, r7, asr #5
|
||||
add r1, r1, r6
|
||||
sub r6, r1, r6, lsl #1
|
||||
add r1, r8, r1, asr #5
|
||||
add r6, r8, r6, asr #5
|
||||
add r2, r2, r5
|
||||
sub r5, r2, r5, lsl #1
|
||||
add r2, r8, r2, asr #5
|
||||
add r5, r8, r5, asr #5
|
||||
sub r3, r3, r4
|
||||
add r4, r3, r4, lsl #1
|
||||
add r3, r8, r3, asr #5
|
||||
add r4, r8, r4, asr #5
|
||||
|
||||
#if __ARM_ARCH__ >= 6
|
||||
usat r0, #8, r0
|
||||
usat r1, #8, r1
|
||||
usat r2, #8, r2
|
||||
usat r3, #8, r3
|
||||
usat r4, #8, r4
|
||||
usat r5, #8, r5
|
||||
usat r6, #8, r6
|
||||
usat r7, #8, r7
|
||||
#else
|
||||
cmp r0, #255
|
||||
mvnhi r0, r0, asr #31
|
||||
andhi r0, #255
|
||||
cmp r7, #255
|
||||
mvnhi r7, r7, asr #31
|
||||
cmp r1, #255
|
||||
mvnhi r1, r1, asr #31
|
||||
andhi r1, #255
|
||||
cmp r6, #255
|
||||
mvnhi r6, r6, asr #31
|
||||
andhi r6, #255
|
||||
cmp r2, #255
|
||||
mvnhi r2, r2, asr #31
|
||||
andhi r2, #255
|
||||
cmp r5, #255
|
||||
mvnhi r5, r5, asr #31
|
||||
andhi r5, #255
|
||||
cmp r3, #255
|
||||
mvnhi r3, r3, asr #31
|
||||
cmp r4, #255
|
||||
mvnhi r4, r4, asr #31
|
||||
andhi r4, #255
|
||||
#endif
|
||||
|
||||
// r3 r2 r1 r0
|
||||
orr r0, r0, r1, lsl #8
|
||||
orr r0, r0, r2, lsl #16
|
||||
orr r0, r0, r3, lsl #24
|
||||
|
||||
// r7 r6 r5 r4
|
||||
orr r1, r4, r5, lsl #8
|
||||
orr r1, r1, r6, lsl #16
|
||||
orr r1, r1, r7, lsl #24
|
||||
stmia fp, {r0, r1}
|
||||
|
||||
add r0, sp, #(off_WORKSPACE + 8*8*4)
|
||||
cmp ip, r0
|
||||
bne HLoopTail
|
||||
|
||||
Exit:
|
||||
add sp, sp, #local_SIZE
|
||||
ldmia sp!, {r4,r5, r6,r7, r8,r9, r10,r11, r12,lr}
|
||||
bx lr
|
||||
|
||||
|
||||
VLoopHeadZero:
|
||||
// ok, all AC coefficients are 0
|
||||
ldr r1, [r10, #QY(0)]
|
||||
add fp, fp, #2
|
||||
add r10, r10, #4
|
||||
mul r0, r1, r0
|
||||
str r0, [ip, #QY(0)]
|
||||
str r0, [ip, #QY(1)]
|
||||
str r0, [ip, #QY(2)]
|
||||
str r0, [ip, #QY(3)]
|
||||
str r0, [ip, #QY(4)]
|
||||
str r0, [ip, #QY(5)]
|
||||
str r0, [ip, #QY(6)]
|
||||
str r0, [ip, #QY(7)]
|
||||
add ip, ip, #4
|
||||
add r0, sp, #(off_WORKSPACE + 4*8)
|
||||
cmp ip, r0
|
||||
beq HLoopStart
|
||||
b VLoopTail
|
||||
|
||||
HLoopTailZero:
|
||||
mov r0, r0, asr #5
|
||||
add r0, #128
|
||||
|
||||
#if __ARM_ARCH__ >= 6
|
||||
usat r0, #8, r0
|
||||
#else
|
||||
cmp r0, #255
|
||||
mvnhi r0, r0, asr #31
|
||||
andhi r0, r0, #255
|
||||
#endif
|
||||
|
||||
orr r0, r0, lsl #8
|
||||
orr r0, r0, lsl #16
|
||||
mov r1, r0
|
||||
stmia fp, {r0, r1}
|
||||
|
||||
add r0, sp, #(off_WORKSPACE + 64*4)
|
||||
cmp ip, r0
|
||||
beq Exit
|
||||
b HLoopTail
|
||||
|
||||
.endfunc
|
Reference in New Issue
Block a user