;
; fastMove.s
;
; Fast memory manipulation (TMA)
;
;  1994-1998 Straylight
;

;----- Licensing note -------------------------------------------------------
;
; This file is part of Straylight's core libraries (corelib).
;
; Corelib is free software; you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation; either version 2, or (at your option)
; any later version.
;
; Corelib is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with Corelib.  If not, write to the Free Software Foundation,
; 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

;----- Standard header ------------------------------------------------------

		GET	libs:header
		GET	libs:swis

;----- External dependencies ------------------------------------------------
;
; None.

;----- Main Code ------------------------------------------------------------

		AREA	|Sapphire$$Code|,CODE,READONLY

; --- fastMove ---
;
; On entry:	R0 == destination pointer
;		R1 == source pointer
;		R2 == number of bytes to move
;
; On exit:	--
;
; Use:		A very fast block moving routine.  Word aligning is not
;		necessary, and the blocks may overlap.  This is basically
;		the routine from PRM 2, hacked to cope with overlapping.

		EXPORT	fastMove
fastMove	ROUT

		; --- Do we need to do it backwards? ---

		CMP	R0,R1
		MOVEQS	PC,R14
		BGT	%49fastMove

		; --- Copy downwards ---

		STMFD	R13!,{R0-R12,R14}	;Stack some registers

		TST	R0,#3			;Is destination word aligned
		BNE	%10fastMove		;No -- word align it

00fastMove	TST	R1,#3			;Is source word aligned
		BNE	%20fastMove		;No -- word align that

		; --- Source and destination are now word aligned ---

		SUBS	R2,R2,#16		;4 or more words to do?
		BLT	%03fastMove

		SUBS	R2,R2,#16		;8 or more words to do?
		BLT	%02fastMove

		; --- 8 words at a time ---

01fastMove	LDMIA	R1!,{R3-R9,R14}		;Load 8 words
		STMIA	R0!,{R3-R9,R14}		;Move them
                SUBS	R2,R2,#32		;Decrement count
                BGE	%01fastMove		;Keep moving blocks if we can

                CMP	R2,#-32			;Are we finished?
                LDMEQFD	R13!,{R0-R12,PC}^	;Yes -- return

                ; --- 4 words at a time ---

02fastMove	ADDS	R2,R2,#16		;4 whole words to do?
		BLT	%03fastMove

		LDMIA	R1!,{R3-R6}		;Load 4 words
		STMIA	R0!,{R3-R6}		;Move them
		LDMEQFD	R13!,{R0-R12,PC}^	;Return if finished
		SUB	R2,R2,#16

		; --- Less than 4 words left ---

03fastMove	ADDS	R2,R2,#8		;2 words to do?
		BLT	%04fastMove

		LDMIA	R1!,{R3,R4}		;Load 2 words
		STMIA	R0!,{R3,R4}		;Move them
		LDMEQFD	R13!,{R0-R12,PC}^	;Return if finished
		SUB	R2,R2,#8

		; --- Less than 2 words left ---

04fastMove	ADDS	R2,R2,#4		;1 word to do?
		BLT	%05fastMove

		LDR	R3,[R1],#4		;Load a word
		STR	R3,[R0],#4		;Move it
		LDMEQFD	R13!,{R0-R12,PC}^	;Return if finished
		SUB	R2,R2,#4

		; --- Less than 1 word! ---

05fastMove	ADDS	R2,R2,#4
		LDMEQFD	R13!,{R0-R12,PC}^	;Pointless?

		; --- 1, 2 or 3 bytes left ---

		LDR	R14,[R1]
06fastMove	STRB	R14,[R0],#1
		MOV	R14,R14,LSR #8
		SUBS	R2,R2,#1
		BGT	%06fastMove
		LDMFD	R13!,{R0-R12,PC}^	;Finished at last!

		; --- Word align destination ---

10fastMove	LDRB	R14,[R1],#1
		STRB	R14,[R0],#1
		SUBS	R2,R2,#1
		LDMEQFD	R13!,{R0-R12,PC}^

		TST	R0,#3			;Is it word aligned?
		BNE	%10fastMove		;No -- keep going

		B	%00fastMove		;Move the rest

		; --- Word align source (Urggg... bloogle...) ---
		; --- PRM for documentation ---

20fastMove	AND	R11,R1,#3
		BIC	R1,R1,#3
		MOV	R11,R11,LSL #3
		RSB	R12,R11,#32
		LDR	R3,[R1],#4
		MOV	R3,R3,LSR R11

		SUBS	R2,R2,#16		;4 or more words to do?
		BLT	%23fastMove

		SUBS	R2,R2,#16		;8 or more words to do?
		BLT	%22fastMove

		; --- 8 words at a time ---

21fastMove	LDMIA	R1!,{R4-R10,R14}	;Load 8 words
		ORR	R3,R3,R4,LSL R12

		MOV	R4,R4,LSR R11
		ORR	R4,R4,R5,LSL R12

		MOV	R5,R5,LSR R11
		ORR	R5,R5,R6,LSL R12

		MOV	R6,R6,LSR R11
		ORR	R6,R6,R7,LSL R12

		MOV	R7,R7,LSR R11
		ORR	R7,R7,R8,LSL R12

		MOV	R8,R8,LSR R11
		ORR	R8,R8,R9,LSL R12

		MOV	R9,R9,LSR R11
		ORR	R9,R9,R10,LSL R12

		MOV	R10,R10,LSR R11
		ORR	R10,R10,R14,LSL R12

		STMIA	R0!,{R3-R10}		;Move them
		MOV	R3,R14,LSR R11
                SUBS	R2,R2,#32		;Decrement count
                BGE	%21fastMove		;Keep moving blocks if we can

                CMP	R2,#-32			;Are we finished?
                LDMEQFD	R13!,{R0-R12,PC}^	;Yes -- return

                ; --- 4 words at a time ---

22fastMove	ADDS	R2,R2,#16		;4 whole words to do?
		BLT	%23fastMove

		LDMIA	R1!,{R4-R7}		;Load 4 words
		ORR	R3,R3,R4,LSL R12

		MOV	R4,R4,LSR R11
		ORR	R4,R4,R5,LSL R12

		MOV	R5,R5,LSR R11
		ORR	R5,R5,R6,LSL R12

		MOV	R6,R6,LSR R11
		ORR	R6,R6,R7,LSL R12

		STMIA	R0!,{R3-R6}		;Move them
		LDMEQFD	R13!,{R0-R12,PC}^	;Return if finished
		SUB	R2,R2,#16
		MOV	R3,R7,LSR R11

		; --- Less than 4 words left ---

23fastMove	ADDS	R2,R2,#8		;2 words to do?
		BLT	%24fastMove

		LDMIA	R1!,{R4,R5}		;Load 2 words
		ORR	R3,R3,R4,LSL R12

		MOV	R4,R4,LSR R11
		ORR	R4,R4,R5,LSL R12

		STMIA	R0!,{R3,R4}		;Move them
		LDMEQFD	R13!,{R0-R12,PC}^	;Return if finished
		SUB	R2,R2,#8
		MOV	R3,R5,LSR R11

		; --- Less than 2 words left ---

24fastMove	ADDS	R2,R2,#4		;1 word to do?
		BLT	%25fastMove

		LDR	R4,[R1],#4		;Load a word
		ORR	R3,R3,R4,LSL R12

		STR	R3,[R0],#4		;Move it
		LDMEQFD	R13!,{R0-R12,PC}^	;Return if finished
		SUB	R2,R2,#4
		MOV	R3,R4,LSR R11

		; --- Less than 1 word! ---

25fastMove	ADDS	R2,R2,#4
		LDMEQFD	R13!,{R0-R12,PC}^	;Pointless?

		; --- 1, 2 or 3 bytes left ---

		LDR	R14,[R1]
		ORR	R3,R3,R14,LSL R12
26fastMove	STRB	R3,[R0],#1
		MOV	R3,R3,LSR #8
		SUBS	R2,R2,#1
		BGT	%26fastMove

		LDMFD	R13!,{R0-R12,PC}^	;Finished at last!

                ; --- Copy upwards ---

49fastMove	STMFD	R13!,{R0-R12,R14}	;Stack some registers
		ADD	R0,R0,R2
		ADD	R1,R1,R2

		TST	R0,#3			;Is destination word aligned
		BNE	%60fastMove		;No -- word align it

50fastMove	TST	R1,#3			;Is source word aligned
		BNE	%70fastMove		;No -- word align that

		; --- Source and destination are now word aligned ---

		SUBS	R2,R2,#16		;4 or more words to do?
		BLT	%53fastMove

		SUBS	R2,R2,#16		;8 or more words to do?
		BLT	%52fastMove

		; --- 8 words at a time ---

51fastMove	LDMDB	R1!,{R3-R9,R14}		;Load 8 words
		STMDB	R0!,{R3-R9,R14}		;Move them
                SUBS	R2,R2,#32		;Decrement count
                BGE	%51fastMove		;Keep moving blocks if we can

                CMP	R2,#-32			;Are we finished?
                LDMEQFD	R13!,{R0-R12,PC}^	;Yes -- return

                ; --- 4 words at a time ---

52fastMove	ADDS	R2,R2,#16		;4 whole words to do?
		BLT	%53fastMove

		LDMDB	R1!,{R3-R6}		;Load 4 words
		STMDB	R0!,{R3-R6}		;Move them
		LDMEQFD	R13!,{R0-R12,PC}^	;Return if finished
		SUB	R2,R2,#16

		; --- Less than 4 words left ---

53fastMove	ADDS	R2,R2,#8		;2 words to do?
		BLT	%54fastMove

		LDMDB	R1!,{R3,R4}		;Load 2 words
		STMDB	R0!,{R3,R4}		;Move them
		LDMEQFD	R13!,{R0-R12,PC}^	;Return if finished
		SUB	R2,R2,#8

		; --- Less than 2 words left ---

54fastMove	ADDS	R2,R2,#4		;1 word to do?
		BLT	%55fastMove

		LDR	R3,[R1,#-4]!		;Load a word
		STR	R3,[R0,#-4]!		;Move it
		LDMEQFD	R13!,{R0-R12,PC}^	;Return if finished
		SUB	R2,R2,#4

		; --- Less than 1 word! ---

55fastMove	ADDS	R2,R2,#4
		LDMEQFD	R13!,{R0-R12,PC}^	;Pointless?

		; --- 1, 2 or 3 bytes left ---

		LDR	R14,[R1,#-4]
56fastMove	MOV	R3,R14,LSR #24
		STRB	R3,[R0,#-1]!
		MOV	R14,R14,LSL #8
		SUBS	R2,R2,#1
		BGT	%56fastMove

		LDMFD	R13!,{R0-R12,PC}^	;Finished at last!

		; --- Word align destination ---

60fastMove	LDRB	R14,[R1,#-1]!
		STRB	R14,[R0,#-1]!
		SUBS	R2,R2,#1
		LDMEQFD	R13!,{R0-R12,PC}^

		TST	R0,#3			;Is it word aligned?
		BNE	%60fastMove		;No -- keep going

		B	%50fastMove		;Move the rest

		; --- Word align source (Urggg... bloogle...) ---
		;
		; There are several oddnesses here.

70fastMove	AND	R11,R1,#3
		BIC	R1,R1,#3		;Word align source
		MOV	R11,R11,LSL #3		;This is the right shift
		RSB	R12,R11,#32		;This is the left shift
		LDR	R14,[R1],#-4
		MOV	R14,R14,LSL R12		;Get the odd bit

		SUBS	R2,R2,#16		;4 or more words to do?
		BLT	%73fastMove

		SUBS	R2,R2,#16		;8 or more words to do?
		BLT	%72fastMove

		; --- 8 words at a time ---

71fastMove	LDMDA	R1!,{R3-R10}		;Load 8 words
		ORR	R14,R14,R10,LSR R11

		MOV	R10,R10,LSL R12
		ORR	R10,R10,R9,LSR R11

		MOV	R9,R9,LSL R12
		ORR	R9,R9,R8,LSR R11

		MOV	R8,R8,LSL R12
		ORR	R8,R8,R7,LSR R11

		MOV	R7,R7,LSL R12
		ORR	R7,R7,R6,LSR R11

		MOV	R6,R6,LSL R12
		ORR	R6,R6,R5,LSR R11

		MOV	R5,R5,LSL R12
		ORR	R5,R5,R4,LSR R11

		MOV	R4,R4,LSL R12
		ORR	R4,R4,R3,LSR R11

		STMDB	R0!,{R4-R10,R14}	;Move them
		MOV	R14,R3,LSL R12
                SUBS	R2,R2,#32		;Decrement count
                BGE	%71fastMove		;Keep moving blocks if we can

                CMP	R2,#-32			;Are we finished?
                LDMEQFD	R13!,{R0-R12,PC}^	;Yes -- return

                ; --- 4 words at a time ---

72fastMove	ADDS	R2,R2,#16		;4 whole words to do?
		BLT	%73fastMove

		LDMDA	R1!,{R3-R6}		;Load 4 words
		ORR	R14,R14,R6,LSR R11

		MOV	R6,R6,LSL R12
		ORR	R6,R6,R5,LSR R11

		MOV	R5,R5,LSL R12
		ORR	R5,R5,R4,LSR R11

		MOV	R4,R4,LSL R12
		ORR	R4,R4,R3,LSR R11

		STMDB	R0!,{R4-R6,R14}		;Move them
		LDMEQFD	R13!,{R0-R12,PC}^	;Return if finished
		SUB	R2,R2,#16
		MOV	R14,R3,LSL R12

		; --- Less than 4 words left ---

73fastMove	ADDS	R2,R2,#8		;2 words to do?
		BLT	%74fastMove

		LDMDA	R1!,{R3,R4}		;Load 2 words
		ORR	R14,R14,R4,LSR R11

		MOV	R4,R4,LSL R12
		ORR	R4,R4,R3,LSR R11

		STMDB	R0!,{R4,R14}		;Move them
		LDMEQFD	R13!,{R0-R12,PC}^	;Return if finished
		SUB	R2,R2,#8
		MOV	R14,R3,LSL R12

		; --- Less than 2 words left ---

74fastMove	ADDS	R2,R2,#4		;1 word to do?
		BLT	%75fastMove

		LDR	R3,[R1],#-4		;Load a word
		ORR	R14,R14,R3,LSR R11

		STR	R14,[R0,#-4]!		;Move it
		LDMEQFD	R13!,{R0-R12,PC}^	;Return if finished
		SUB	R2,R2,#4
		MOV	R14,R3,LSL R12

		; --- Less than 1 word! ---

75fastMove	ADDS	R2,R2,#4
		LDMEQFD	R13!,{R0-R12,PC}^	;Pointless?

		; --- 1, 2 or 3 bytes left ---

		LDR	R3,[R1],#-4
		ORR	R14,R14,R3,LSR R11
76fastMove	MOV	R3,R14,LSR #24
		STRB	R3,[R0,#-1]!
		MOV	R14,R14,LSL #8
		SUBS	R2,R2,#1
		BGT	%76fastMove

		LDMFD	R13!,{R0-R12,PC}^	;Finished at last!

;----- Workspace ------------------------------------------------------------
;
; None.

;----- That's all folks -----------------------------------------------------

		END
