 GET TimLib:hdr.Macros

 EXPORT Flac_ASM_LPC32
 EXPORT Flac_ASM_LPC64

; default area assignment

 AREA |Assembler|,READONLY,CODE

;----------------------------------------------------------------------------
; Flac_ASM_LPC64
;
; In  - R0  pointer to
;       +0  LPC predictors
;       +4  LPC order
;       +8  pointer to samples (has R2 warmup samples preceding it)
;           prefilled with residues
;       +12 nr of samples to calculate
;       +16 LPC shift
;----------------------------------------------------------------------------
 ALIGN
Flac_ASM_LPC64 _FNAME
 _DEFPROC "R4-R9"

 LDMIA   R0,{R0-R4}
; Calculate jump
 RSB     R1,R1,#32
 MOV     R5,#12
 MUL     R1,R5,R1
 RSB     R9,R4,#32

; loop on samples
Flac_ASM_LPC64_SampleLoop
 MOV     R5,#0
 MOV     R6,#0
; jump to the correct place in the vector multiplication
 ADD     PC,PC,R1
 MOV     R0,R0
; loop on orders
 LDR     R7,[R2,#-128]
 LDR     R8,[R0,#124]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-124]
 LDR     R8,[R0,#120]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-120]
 LDR     R8,[R0,#116]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-116]
 LDR     R8,[R0,#112]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-112]
 LDR     R8,[R0,#108]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-108]
 LDR     R8,[R0,#104]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-104]
 LDR     R8,[R0,#100]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-100]
 LDR     R8,[R0,#96]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-96]
 LDR     R8,[R0,#92]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-92]
 LDR     R8,[R0,#88]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-88]
 LDR     R8,[R0,#84]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-84]
 LDR     R8,[R0,#80]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-80]
 LDR     R8,[R0,#76]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-76]
 LDR     R8,[R0,#72]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-72]
 LDR     R8,[R0,#68]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-68]
 LDR     R8,[R0,#64]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-64]
 LDR     R8,[R0,#60]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-60]
 LDR     R8,[R0,#56]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-56]
 LDR     R8,[R0,#52]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-52]
 LDR     R8,[R0,#48]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-48]
 LDR     R8,[R0,#44]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-44]
 LDR     R8,[R0,#40]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-40]
 LDR     R8,[R0,#36]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-36]
 LDR     R8,[R0,#32]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-32]
 LDR     R8,[R0,#28]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-28]
 LDR     R8,[R0,#24]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-24]
 LDR     R8,[R0,#20]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-20]
 LDR     R8,[R0,#16]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-16]
 LDR     R8,[R0,#12]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-12]
 LDR     R8,[R0,#8]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-8]
 LDR     R8,[R0,#4]
 SMLAL   R5,R6,R7,R8
 LDR     R7,[R2,#-4]
 LDR     R8,[R0,#0]
 SMLAL   R5,R6,R7,R8
;
 LDR     R7,[R2]
 MOV     R5,R5,LSR R4
 ADD     R5,R5,R6,LSL R9
 ADD     R7,R7,R5
 STR     R7,[R2],#4
 SUBS    R3,R3,#1
 BGT     Flac_ASM_LPC64_SampleLoop

 _ENDPROC

;----------------------------------------------------------------------------
; Flac_ASM_LPC32
;
; In  - R0  pointer to
;       +0  LPC predictors
;       +4  LPC order [1, 32]
;       +8  pointer to samples (has R2 warmup samples preceding it)
;           prefilled with residues
;       +12 nr of samples to calculate
;       +16 LPC shift
;----------------------------------------------------------------------------
 ALIGN
Flac_ASM_LPC32 _FNAME
 _DEFPROC "R4-R12"

 LDMIA   R0,{R0-R4}
 CMP     R1,#4
 ADDLS   PC,PC,R1,LSL #2
 B       Flac_ASM_LPC32_n
 B       Flac_ASM_LPC32_0
 B       Flac_ASM_LPC32_1
 B       Flac_ASM_LPC32_2
 B       Flac_ASM_LPC32_3
 B       Flac_ASM_LPC32_4

Flac_ASM_LPC32_n
; Calculate jump
 RSB     R1,R1,#32
 MOV     R5,#16  ; 4 instructions
 MUL     R1,R5,R1

 SUBS    R3,R3,#1
 BEQ     Flac_ASM_LPC32_n_1

; loop on samples
Flac_ASM_LPC32_SampleLoop
 MOV     R6,#0
 MOV     R9,#0
 MOV     R8,#0
; jump to the correct place in the vector multiplication
 ADD     PC,PC,R1
 MOV     R0,R0
; loop on orders
 LDR     R7,[R2,#-128]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#124]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-124]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#120]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-120]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#116]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-116]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#112]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-112]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#108]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-108]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#104]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-104]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#100]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-100]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#96]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-96]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#92]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-92]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#88]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-88]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#84]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-84]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#80]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-80]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#76]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-76]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#72]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-72]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#68]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-68]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#64]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-64]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#60]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-60]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#56]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-56]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#52]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-52]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#48]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-48]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#44]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-44]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#40]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-40]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#36]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-36]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#32]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-32]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#28]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-28]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#24]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-24]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#20]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-20]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#16]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-16]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#12]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-12]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#8]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-8]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#4]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-4]
 MLA     R9,R7,R8,R9
 LDR     R8,[R0,#0]
 MLA     R6,R7,R8,R6
;
 LDR     R7,[R2,#0]
 ADD     R7,R7,R6,ASR R4
 MLA     R9,R7,R8,R9
 LDR     R8,[R2,#4]
 ADD     R8,R8,R9,ASR R4
 STMIA   R2!,{R7,R8}
 SUBS    R3,R3,#2
 BGT     Flac_ASM_LPC32_SampleLoop

; One more to compute if (R3+1) is odd size
 _ENDPROC NE

Flac_ASM_LPC32_n_1
; Calculate jump
 SUB     R1,R1,R1,LSR #2 ; 4 - 1 instrutions

 MOV     R6,#0
; jump to the correct place in the vector multiplication
 ADD     PC,PC,R1
 MOV     R0,R0
; loop on orders
 LDR     R7,[R2,#-128]
 LDR     R8,[R0,#124]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-124]
 LDR     R8,[R0,#120]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-120]
 LDR     R8,[R0,#116]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-116]
 LDR     R8,[R0,#112]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-112]
 LDR     R8,[R0,#108]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-108]
 LDR     R8,[R0,#104]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-104]
 LDR     R8,[R0,#100]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-100]
 LDR     R8,[R0,#96]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-96]
 LDR     R8,[R0,#92]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-92]
 LDR     R8,[R0,#88]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-88]
 LDR     R8,[R0,#84]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-84]
 LDR     R8,[R0,#80]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-80]
 LDR     R8,[R0,#76]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-76]
 LDR     R8,[R0,#72]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-72]
 LDR     R8,[R0,#68]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-68]
 LDR     R8,[R0,#64]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-64]
 LDR     R8,[R0,#60]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-60]
 LDR     R8,[R0,#56]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-56]
 LDR     R8,[R0,#52]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-52]
 LDR     R8,[R0,#48]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-48]
 LDR     R8,[R0,#44]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-44]
 LDR     R8,[R0,#40]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-40]
 LDR     R8,[R0,#36]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-36]
 LDR     R8,[R0,#32]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-32]
 LDR     R8,[R0,#28]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-28]
 LDR     R8,[R0,#24]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-24]
 LDR     R8,[R0,#20]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-20]
 LDR     R8,[R0,#16]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-16]
 LDR     R8,[R0,#12]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-12]
 LDR     R8,[R0,#8]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-8]
 LDR     R8,[R0,#4]
 MLA     R6,R7,R8,R6
 LDR     R7,[R2,#-4]
 LDR     R8,[R0,#0]
 MLA     R6,R7,R8,R6
;
 LDR     R7,[R2]
 ADD     R7,R7,R6,ASR R4
 STR     R7,[R2],#4

Flac_ASM_LPC32_0
 _ENDPROC

Flac_ASM_LPC32_4
 LDMIA   R0,{R5-R8}
 SUB     R2,R2,#4*4
 LDMIA   R2!,{R10-R12,R14}
Flac_ASM_LPC32_4_SampleLoop
 LDR     R1,[R2]
 MUL     R9,R5,R14
 MLA     R9,R6,R12,R9
 MLA     R9,R7,R11,R9
 MLA     R9,R8,R10,R9
 ADD     R9,R1,R9 ,ASR R4
 LDR     R1,[R2,#4]
 MUL     R10,R5,R9
 MLA     R10,R6,R14,R10
 MLA     R10,R7,R12,R10
 MLA     R10,R8,R11,R10
 ADD     R10,R1,R10,ASR R4
 LDR     R1,[R2,#8]
 MUL     R11,R5,R10
 MLA     R11,R6,R9 ,R11
 MLA     R11,R7,R14,R11
 MLA     R11,R8,R12,R11
 ADD     R11,R1,R11,ASR R4
 LDR     R1,[R2,#12]
 MUL     R12,R5,R11
 MLA     R12,R6,R10,R12
 MLA     R12,R7,R9 ,R12
 MLA     R12,R8,R14,R12
 ADD     R12,R1,R12,ASR R4
 STMIA   R2!,{R9-R12}
 MOV     R14,R12
 MOV     R12,R11
 MOV     R11,R10
 MOV     R10,R9
 SUBS    R3,R3,#4
 BGT     Flac_ASM_LPC32_4_SampleLoop
 _ENDPROC

Flac_ASM_LPC32_3
 LDMIA   R0,{R0-R1,R5}
 LDR     R6,[R2,#-12]
 LDR     R7,[R2,#-8]
 LDR     R8,[R2,#-4]
Flac_ASM_LPC32_3_SampleLoop
 LDMIA   R2,{R9-R12}
 MUL     R14,R0,R8
 MLA     R14,R1,R7,R14
 MLA     R14,R5,R6,R14
 ADD     R9,R9,R14,ASR R4
 MUL     R14,R0,R9
 MLA     R14,R1,R8,R14
 MLA     R14,R5,R7,R14
 ADD     R10,R10,R14,ASR R4
 MUL     R14,R0,R10
 MLA     R14,R1,R9,R14
 MLA     R14,R5,R8,R14
 ADD     R11,R11,R14,ASR R4
 MUL     R14,R0,R11
 MLA     R14,R1,R10,R14
 MLA     R14,R5,R9,R14
 ADD     R12,R12,R14,ASR R4
 STMIA   R2!,{R9-R12}
 MOV     R6,R10
 MOV     R7,R11
 MOV     R8,R12
 SUBS    R3,R3,#4
 BGT     Flac_ASM_LPC32_3_SampleLoop
 _ENDPROC

Flac_ASM_LPC32_2
 LDMIA   R0,{R0-R1}
 LDR     R7,[R2,#-8]
 LDR     R8,[R2,#-4]
Flac_ASM_LPC32_2_SampleLoop
 LDMIA   R2,{R9-R12}
 MUL     R14,R0,R8
 MLA     R14,R1,R7,R14
 ADD     R9,R9,R14,ASR R4
 MUL     R14,R0,R9
 MLA     R14,R1,R8,R14
 ADD     R10,R10,R14,ASR R4
 MUL     R14,R0,R10
 MLA     R14,R1,R9,R14
 ADD     R11,R11,R14,ASR R4
 MUL     R14,R0,R11
 MLA     R14,R1,R10,R14
 ADD     R12,R12,R14,ASR R4
 STMIA   R2!,{R9-R12}
 MOV     R7,R11
 MOV     R8,R12
 SUBS    R3,R3,#4
 BGT     Flac_ASM_LPC32_2_SampleLoop
 _ENDPROC

Flac_ASM_LPC32_1
 LDR     R0,[R0]
 LDR     R8,[R2,#-4]
Flac_ASM_LPC32_1_SampleLoop
 LDMIA   R2,{R9-R12}
 MUL     R14,R0,R8
 ADD     R9,R9,R14,ASR R4
 MUL     R14,R0,R9
 ADD     R10,R10,R14,ASR R4
 MUL     R14,R0,R10
 ADD     R11,R11,R14,ASR R4
 MUL     R14,R0,R11
 ADD     R12,R12,R14,ASR R4
 STMIA   R2!,{R9-R12}
 MOV     R12,R8
 SUBS    R3,R3,#4
 BGT     Flac_ASM_LPC32_1_SampleLoop
 _ENDPROC

 END
