;
; xswi.s
;
; SWI and routine veneers
;
;  1996-1998 Straylight
;

;----- Licensing note -------------------------------------------------------
;
; This file is part of Straylight's core libraries (corelib)
;
; Corelib is free software; you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation; either version 2, or (at your option)
; any later version.
;
; Corelib is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with Corelib.  If not, write to the Free Software Foundation,
; 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

;----- Options provided -----------------------------------------------------
;
; OPT_CALL	Build code for calling local assembler code
; OPT_SAPPHIRE	Do Sapphire R11/sl veneering

		MACRO
		DCLOPT	$var
		[	:DEF:$var
$var		SETL	{TRUE}
		|
		GBLL	$var
$var		SETL	{FALSE}
		]
		MEND

		DCLOPT	OPT_CALL
		DCLOPT	OPT_SAPPHIRE

;----- Main code ------------------------------------------------------------

		GET	libs:s.swihack
		[	:LNOT::DEF:swix__dfn

; --- _swi, _swix, _call, _callx ---
;
; On entry:	R0 == SWI number (for _swi[x]) or address (for _call[x])
;		R1 == feature flags:
;			 0--9  == input registers
;			  11   == local block flag
;			12--15 == local block register
;			16--19 == return register (_call and _swi only)
;			31--22 == output registers
;		R2, R3 and stack contain other arguments
;
; On exit:	R0 == return value, or error indicator
;
; Use:		Calls a SWI or assembler routine.

		EXPORT	|_swi|
		EXPORT	|_swix|

		; --- How this works ---
		;
		; The old version of this code used to build some neat code
		; on the stack and then execute it.  This new spiffy version
		; just saves data on the stack, because building code is a
		; major no-no on the StrongARM.
		;
		; The data stacked is as follows:
		;
		;	  PC   -- address to call
		;	  R14  -- return address
		;	  R12  -- saved R12 value
		;	  R11  -- maybe Sapphire application context
		;	  R10  -- maybe the SWI number
		;
		; Because I can't use dynamic code at all, I'm having to
		; use some really nasty speed hacks here.

		; --- SWI entries ---

|_swi|		STMFD	R13!,{R2,R3}		;Stack all variable args
		STMFD	R13!,{R4-R12,R14}	;Save other registers
		ADR	R12,|_swi_nonx|		;Point to non-X entry point
		MOV	R9,R0			;Fetch the SWI number
		LDR	R0,|_swihack|		;Point to SWI calling routine
		B	|_swi_main|		;Skip onwards to main code

|_swix|		STMFD	R13!,{R2,R3}		;Stack all variable args
		STMFD	R13!,{R4-R12,R14}	;Save other registers
		BIC	R1,R1,#&000F0000	;Return R0 value no matter
		ADR	R12,|_swi_x|		;Point to X entry point
		ORR	R9,R0,#&20000		;Set the X bit on the SWI
		LDR	R0,|_swihack|		;Point to SWI calling routine
		B	|_swi_main|		;Skip onwards to main code

		; --- Code entries ---

	[ OPT_CALL

		EXPORT	|_call|
		EXPORT	|_callx|

|_call|		STMFD	R13!,{R2,R3}		;Stack all variable args
		STMFD	R13!,{R4-R12,R14}	;Save other registers
		ADR	R12,|_swi_nonx|		;Point to non-X entry point
	  [ OPT_SAPPHIRE
		MOV	R11,R10			;Get scratchpad for Sapphire
	  ]
		B	|_swi_main|		;Skip onwards to main code

|_callx|	STMFD	R13!,{R2,R3}		;Stack all variable args
		STMFD	R13!,{R4-R12,R14}	;Save other registers
		BIC	R1,R1,#&000F0000	;Return R0 value no matter
		ADR	R12,|_swi_x|		;Point to X entry point
	  [ OPT_SAPPHIRE
		MOV	R11,R10			;Get scratchpad for Sapphire
	  ]
		B	|_swi_main|		;Skip onwards to main code

	]

		; --- First job is to set up the call address ---

|_swi_main|	MOV	R14,PC			;Get the current CPU flags
		AND	R14,R14,#&0C000003	;Leave interrupts and mode
		ORR	R12,R12,R14		;Set the return address
		ORR	R14,R0,R14		;And the call address
		SUB	R13,R13,#8		;Make a hole in the stack
		STMFD	R13!,{R9-R12,R14}	;Save R10-R12 and PC (fake)

		; --- Set up the input registers ---
		;
		; Unrolled loop to do two registers at a time.  There are
		; frequent exits while scanning the early registers to
		; speed up common cases, petering out towards the end.

		MOV	R10,R1			;Fetch the feature flags
		ADD	R12,R13,#68		;Point to arguments

		MOVS	R14,R10,LSL #31
		LDRMI	R0,[R12],#4
		LDRCS	R1,[R12],#4
		TST	R10,#&3FC
		BEQ	%f00
		MOVS	R14,R10,LSL #29
		LDRMI	R2,[R12],#4
		LDRCS	R3,[R12],#4
		TST	R10,#&3F0
		BEQ	%f00
		MOVS	R14,R10,LSL #27
		LDRMI	R4,[R12],#4
		LDRCS	R5,[R12],#4
		TST	R10,#&3C0
		BEQ	%f00
		MOVS	R14,R10,LSL #25
		LDRMI	R6,[R12],#4
		LDRCS	R7,[R12],#4
		MOVS	R14,R10,LSL #23
		LDRMI	R8,[R12],#4
		LDRCS	R9,[R12],#4
00
		; --- Now sort out block arguments ---

		ADD	R14,R13,#20		;Find the hole in the stack
		STMIA	R14,{R10,R12}		;Save important context
		TST	R10,#&800		;Do we have a block argument?
		BNE	|_swi_block|		;Yes -- sort out out-of-line
		LDMFD	R13!,{R10-R12,R14,PC}^	;And call the routine

		; --- X-type return ---

|_swi_x|	LDMFD	R13!,{R10,R12}		;Reload important context
		MOVVC	R14,#0			;If no error, return zero
		MOVVS	R14,R0			;Otherwise point to the error
		STR	R14,[R13,#-4]!		;Store as return value
		B	|_swi_output|		;And skip on a little

		; --- Non-X-type return ---
		;
		; Pick out the correct register with a branch table.  Also
		; invert the table to pick out common case of return R0.

|_swi_nonx|	LDMFD	R13!,{R10,R12}		;Reload important context
		MOV	R14,#&F			;A nice bitmask
		AND	R14,R14,R10,LSR #16	;So mask the return register
		RSB	R14,R14,#&F		;Invert range hackily
		ADD	PC,PC,R14,LSL #3	;And dispatch nicely
		DCB	"hack"

		STR	PC,[R13,#-4]!
		B	|_swi_output|
		STR	R14,[R13,#-4]!
		B	|_swi_output|
		STR	R13,[R13,#-4]!
		B	|_swi_output|
		STR	R12,[R13,#-4]!
		B	|_swi_output|
		STR	R11,[R13,#-4]!
		B	|_swi_output|
		STR	R10,[R13,#-4]!
		B	|_swi_output|
		STR	R9,[R13,#-4]!
		B	|_swi_output|
		STR	R8,[R13,#-4]!
		B	|_swi_output|
		STR	R7,[R13,#-4]!
		B	|_swi_output|
		STR	R6,[R13,#-4]!
		B	|_swi_output|
		STR	R5,[R13,#-4]!
		B	|_swi_output|
		STR	R4,[R13,#-4]!
		B	|_swi_output|
		STR	R3,[R13,#-4]!
		B	|_swi_output|
		STR	R2,[R13,#-4]!
		B	|_swi_output|
		STR	R1,[R13,#-4]!
		B	|_swi_output|
		STR	R0,[R13,#-4]!

		; --- Now handle output parameters ---
		;
		; Same style as the input parameters, with early exits
		; placed conveniently.

|_swi_output|	MOV	R11,PC			;Get the CPU flags

		TST	R10,#&FF000000		;Are there any output args?
		TSTEQ	R10,#&00E00000
		BEQ	%f10			;No -- skip onwards then

		MOVS	R14,R10,LSL #1
		LDRCS	R14,[R12],#4
		STRCS	R0,[R14,#0]
		LDRMI	R14,[R12],#4
		STRMI	R1,[R14,#0]
		MOVS	R14,R10,LSL #3
		LDRCS	R14,[R12],#4
		STRCS	R2,[R14,#0]
		LDRMI	R14,[R12],#4
		STRMI	R3,[R14,#0]
		TST	R10,#&0FC00000
		BEQ	%f00
		MOVS	R14,R10,LSL #5
		LDRCS	R14,[R12],#4
		STRCS	R4,[R14,#0]
		LDRMI	R14,[R12],#4
		STRMI	R5,[R14,#0]
		TST	R10,#&03C00000
		BEQ	%f00
		MOVS	R14,R10,LSL #7
		LDRCS	R14,[R12],#4
		STRCS	R6,[R14,#0]
		LDRMI	R14,[R12],#4
		STRMI	R7,[R14,#0]
		MOVS	R14,R10,LSL #9
		LDRCS	R14,[R12],#4
		STRCS	R8,[R14,#0]
		LDRMI	R14,[R12],#4
		STRMI	R9,[R14,#0]
00
		; --- Handle returning flags ---

		TST	R10,#&00200000
		LDRNE	R14,[R12],#4
		STRNE	R11,[R14,#0]
10
		LDMFD	R13!,{R0,R4-R12,R14}
		ADD	R13,R13,#8
		MOVS	PC,R14

		; --- Handle block arguments ---
		;
		; Shift output registers to the right to find the block.
		; Then dispatch through a branch table to store in the right
		; register.  All the registers from R10 upwards are on the
		; stack so they can be restored easily.

|_swi_block|	MOV	R11,R10,LSR #22		;Mask off output registers
		MOV	R11,R11,LSL #21		;Shift down one place
		MOV	R14,R12			;Preserve R12 here
00		MOVS	R11,R11,LSL #2		;Shift into C and N flags
		ADDCS	R14,R14,#4		;If C set, bump counter
		ADDMI	R14,R14,#4		;If N set, bump counter
		BNE	%b00			;And loop back until done
		AND	R11,R10,#&0000F000	;Fetch the right argument
		ADD	PC,PC,R11,LSR #9	;Dispatch through branch tbl
		DCB	"hack"

		; --- Main dispatch table ---
		;
		; This is now just a branch off the main routine, so I
		; can just call the SWI/routine appropriately rather than
		; returning to the caller.  This gives me an extra register
		; to play with above, which helps.

		MOV	R0,R14
		LDMFD	R13!,{R10-R12,R14,PC}^
		MOV	R1,R14
		LDMFD	R13!,{R10-R12,R14,PC}^
		MOV	R2,R14
		LDMFD	R13!,{R10-R12,R14,PC}^
		MOV	R3,R14
		LDMFD	R13!,{R10-R12,R14,PC}^
		MOV	R4,R14
		LDMFD	R13!,{R10-R12,R14,PC}^
		MOV	R5,R14
		LDMFD	R13!,{R10-R12,R14,PC}^
		MOV	R6,R14
		LDMFD	R13!,{R10-R12,R14,PC}^
		MOV	R7,R14
		LDMFD	R13!,{R10-R12,R14,PC}^
		MOV	R8,R14
		LDMFD	R13!,{R10-R12,R14,PC}^
		MOV	R9,R14
		LDMFD	R13!,{R10-R12,R14,PC}^

		; --- For safety, handle daft values of the parameter ---

		LDMFD	R13!,{R10-R12,R14,PC}^
		DCB	"daft"
		LDMFD	R13!,{R10-R12,R14,PC}^
		DCB	"daft"
		LDMFD	R13!,{R10-R12,R14,PC}^
		DCB	"daft"
		LDMFD	R13!,{R10-R12,R14,PC}^
		DCB	"daft"
		LDMFD	R13!,{R10-R12,R14,PC}^
		DCB	"daft"
		LDMFD	R13!,{R10-R12,R14,PC}^
		DCB	"daft"

		LTORG

		]

;----- That's all, folks ----------------------------------------------------

		END
