; *********************************************************************
;
;  DIVAPC ARM Assembler source
;
;  CPU.S.CPUS_G  Assembler interface to Gemini card
;
;  Versions
;  1.65 24-11-94 INH     Loosely derived from CPUS-E for v1.64
;  1.75 20-03-95         Added CPUS_StopFlag for callbacks
;  1.83 15-05-95         Added StopFlag in block in/out routines
;  2.12 1997.09.24 RW    Optimised Do_access
;  2.13 1997.09.29 W/RW  Trace for IO accesses added
;  2.15 1997.10.14 RW    CPUs + video optimised
;       1998.01.31 MB    Robin's IO tracing set free (see ;BL... etc.)
; *********************************************************************

    GET SYS.S.STDDEFS

           ; set to one to have IO tracing enabled
IOtrace * 0

Traceaddress * &22C	; Ignored unless you uncomment a few lines below

; Definitions ---------------------------------------------------------

; Address definitions for podule

MASR        EQU &3730000
MDR         EQU &3730004

; Bit definitions for podule registers

MASR_Access_bit       EQU  1 :SHL: 31
MASR_Word_bit         EQU  1 :SHL: 30
MASR_Write_bit        EQU  1 :SHL: 29
MASR_Mem_bit          EQU  1 :SHL: 28
MASR_DMA_bit          EQU  1 :SHL: 27

; Offsets within 'handler' structure (16 pre-subtracted for optimisation)

H_READ8   EQU 0-16
H_READ16  EQU 4-16
H_WRITE8  EQU 8-16
H_WRITE16 EQU 12-16
H_R12val  EQU 16-16

DispMemBase EQU &A0000   ; First memory range

; ---------------------------------------------------------------------


        AREA |C$$Code|, CODE, READONLY


        EXPORT      CPUS_Run


        EXPORT      CPU_BlockIn
        EXPORT      CPU_BlockOut

        EXPORT      CPUS_IOArray
        EXPORT      CPUS_MemArray
        EXPORT      CPUS_pStopFlag
        EXPORT      CPUS_ErrorWord

        EXPORT      CPUS_Read
        EXPORT      CPUS_Write
        EXPORT      CPU_HardwareID
        ; New from RJW
        EXPORT	    Run_Poll
	EXPORT	    Run_Poll_PostRead8
	EXPORT	    Run_Poll_PostRead16


        IMPORT      CPU_TraceIO     ; c function in CPU.c

        ALIGN

CPUS_IOArray        DCD 0           ; Array of handlers for I/O space
CPUS_MemArray       DCD 0           ; Array of handlers for memory space
CPUS_pStopFlag      DCD 0           ; Pointer to int: non-zero value will
                                    ; stop emulation
CPUS_ErrorWord      DCD 0           ; Non-zero if hardware error during
                                    ; emulation

MASR_load       DCD     MASR
r10_tmp		DCD	0


 IMPORT SYS_tracedata

CPUS_Run ; (void) *****************************************
        ; This may be called either from any processor mode, and must be
        ; expected to restore the ARM to that mode afterwards
	STMFD	SP!, {R4-R11,LR}

        MOV    R0, PC
        STR    R0, Run_ModeSave      ; R0 contains CPU mode to return to
	EnterSVCmode

        ; Initialise

        LDR    R4, CPUS_pStopFlag
        LDR    R5, MASR_load
        LDR    R6, CPUS_MemArray
        LDR    R7, CPUS_IOArray
        ADD    R6, R6,#16
        ADD    R7, R7,#16
        MOV    R8, # &FFFFFFFF      ; Used for generating masks
        STR    R10, r10_tmp

        MOV    R0, #0
        STR    R0, CPUS_ErrorWord

        B      Run_Poll

        ; Start of main polling loop ***********************************

        ; At this point
        ;  R4 = Stop flag
        ;  R5 = MASR address
        ;  R6 = MemArray base
        ;  R7 = IOArray base
        ;  R8 = 0xFFFFFFFF (for mask)

        ; The loop is entered for the first time at label 'Run_Poll'
        ; While waiting for an access to occur, it loops back
        ; to 'Run_NoAccess'. After a callback has occurred,
        ; it returns to 'Run_ChkStopflag'

        ; This code backs off from using the OpenBus when the PC is
        ; not making accesses, to avoid using up bus bandwidth.

Run_NoAccess
        MOV    R0, #5	              ; Initial timeout period
        ;MOV    R0, #30               ; 30 for SA. 5 for ARM ?

Run_WaitForIt
        MOV    R1, R0                 ; If no access, wait for a bit
Run_WaitLoop                          ; to give PC some bus space
        SUBS   R1, R1, #1
        BNE    Run_WaitLoop

        ADD    R0, R0, R0, LSR #2     ; adds a quarter (rounded)
        CMP    R0, #180               ; to timeout until max of 180
        ;CMP    R0, #1000             ; 1000 for SA ?
        MOVHS  R0, #180               ; is reached, where it stays
        ;MOVHS  R0, #1000

        LDR    R3, [R4]               ; Get stopflag
        CMP    R3, #0
	BNE    Run_Exit

        LDR    R3, [R5]               ; Try again
        TST    R3, # MASR_Access_bit
        BNE    Run_DoAccess           ; If it's there, do it

        B      Run_WaitForIt

        ; --------------

        ; Exit points --------------------
PreRun_CheckStop
	STR    R0, [R5, #MDR-MASR]      ; Deliver to PC
Run_CheckStop
	LDR    R3, [R4]                 ; Has a callback routine stopped
	CMP    R3, #0			; the emulation? Exit if so.
	BEQ    Run_Poll
Run_Exit
	LDR    R0, Run_ModeSave	; Get back initial mode value
	TEQP   R0, #0			; Copy flags to PC
	MOVNV  R0, R0			; wait for it to settle

	LDMFD  SP!, {R4-R11,PC}^


        ; Memory dispatch routines
Run_MemRd8
        AND    R0, R3, R8, LSR #12    ; Get R0 = PC memory address
        SUBS   R2, R0, # DispMemBase  ; R2 = address - first address

        MOVHS  R2, R2, LSR #14        ; ASSUME: mem chunks are 2^14 bytes
                                      ; R2 = chunk no.
        ADDHS  R9, R6, R2, LSL #2     ; R2 = MemBase + (chunk no * 20)
                                      ;    = address of handler structure
                                      ; ASSUME: each handler is 20 bytes
        LDRHS  R12, [R9, R2,LSL#4]!
	; Add 3 to ensure we arrive back in SVC mode
	ADRHS  LR,PreRun_CheckStop+3
        LDRHS  PC,  [R9, # H_READ8]  ; Get function address
        B      Run_MASRerror         ; give error if bad

Run_MemRd16
        AND	R0, R3, R8, LSR #12	; Get R0 = PC memory address
        SUBS	R2, R0, # DispMemBase	; R2 = address - first address

	MOVHS	R2, R2, LSR #14      	; ASSUME: mem chunks are 2^14 bytes
                                    	; R2 = chunk no.
	ADDHS	R9, R6, R2, LSL #2   	; R2 = MemBase + (chunk no * 20)
                                    	;    = address of handler structure
                                    	; ASSUME: each handler is 20 bytes
	LDRHS	R12, [R9, R2, LSL#4]!
	; Add 3 to ensure we arrive back in SVC mode
	ADRHS	LR,PreRun_CheckStop+3
	LDRHS	PC,  [R9, # H_READ16]	; Get function address
	B	Run_MASRerror       	; give error if bad

Run_IORd16
        AND    R0, R3, R8, LSR #16	; Get R0 = PC memory address

        AND    R2, R0, # &3FC     	; ASSUME: IO chunks are 4 bytes
                                  	; Get R2 = chunk no * 4
        ADD    R2, R2, R2, LSL #2	; R2 = chunk no * 20
                                  	;    = address of handler structure
	LDR    R10,r10_tmp
        LDR    R12, [R2, R7]!
	ADR    LR,PreRun_CheckStop+3
        LDR    PC,  [R2, # H_READ16]	; Get function address
Run_IORd8
        AND    R0, R3, R8, LSR #16	; Get R0 = PC memory address

 [ IOtrace=1                      	; !! test for soundblaster tracing
        CMP	R0, #Traceaddress  	; access SB card DSP reg?
        ;BNE	IORd8cont
 	STMFD	R13!,{R0-R3,R12,R14}	; save everything
        ExitSVCmode
        MOV	R1, #0              	; R0=address, R1=data (read, so no data)
        MOV	R2, #H_READ8        	; R2=access type
        BL	CPU_TraceIO        	; go for it!
        EnterSVCmode
        LDMFD	R13!,{R0-R3,R12,R14}	; put it all back

IORd8cont
 ]
	AND	R2, R0, # &3FC    	; ASSUME: IO chunks are 4 bytes
                                 	; Get R2 = chunk no * 4
	ADD	R2, R2, R2, LSL #2	; R2 = chunk no * 20
					;    = address of handler structure
	LDR	R10,r10_tmp
	LDR	R12, [R2, R7]!
	ADR	LR,PreRun_CheckStop+3
	LDR	PC,  [R2, # H_READ8]	; Get function address


Run_Poll_PostRead8
Run_Poll_PostRead16
	STR	R0, [R5, #MDR-MASR]	; Deliver to PC
Run_Poll
        LDR	R3, [R5]           	; Get MASR contents into R3
Run_DoAccess
	ADR	LR,Run_CheckStop+3
	LDR	PC, [PC, R3, LSR #25]	; Convert to dispatch table entry
                                     	; Convert DMA bit (27) to bit 2.
                                     	; Bits 25 and 26 should read as
                                     	; zero, but in fact it won't
                                     	; matter because we're not doing
                                     	; ADDS
        MOVNV  R0, R0                	; Padding to allow for PC value


        ; Dispatch table
        ; Entries in this table are for bits as follows
        ;   DMA flag = 1
        ; + Mem flag = 2
        ; + Write flag = 4
        ; + Word flag = 8
        ; + Access_present flag = 16

        ; The first 16 entries are all zero (no access)
        ; Currently we ignore the DMA flag (!!)
DispTable
	DCD	Run_NoAccess
	DCD	Run_NoAccess
	DCD	Run_NoAccess
	DCD	Run_NoAccess

	DCD	Run_NoAccess
	DCD	Run_NoAccess
	DCD	Run_NoAccess
	DCD	Run_NoAccess

	DCD	Run_NoAccess
	DCD	Run_NoAccess
	DCD	Run_NoAccess
	DCD	Run_NoAccess

	DCD	Run_NoAccess
	DCD	Run_NoAccess
	DCD	Run_NoAccess
	DCD	Run_NoAccess

	DCD	Run_IORd8
	DCD	Run_IORd8
	DCD	Run_MemRd8
	DCD	Run_MemRd8

	DCD	Run_IOWr8
	DCD	Run_IOWr8
	DCD	Run_MemWr8
	DCD	Run_MemWr8

	DCD	Run_IORd16
	DCD	Run_IORd16
	DCD	Run_MemRd16
	DCD	Run_MemRd16

	DCD	Run_IOWr16
	DCD	Run_IOWr16
	DCD	Run_MemWr16
	DCD	Run_MemWr16

        ; IO dispatch routines
Run_IOWr16
        LDR    R1, [R5, #MDR-MASR]    ; R1 = I/O data; release 486
        AND    R0, R3, R8, LSR #16    ; Get R0 = PC memory address

        AND    R2, R0, # &3FC         ; ASSUME: IO chunks are 4 bytes
                                      ; Get R2 = chunk no * 4
        ADD    R2, R2, R2, LSL #2     ; R2 = chunk no * 20
        ;ADD    R2, R7, R2             ; R2 = IOBase + (chunk no * 20)
                                      ;    = address of handler structure
	LDR	r10,r10_tmp
        LDR    R12, [R2, R7]!
        LDR    PC,  [R2, # H_WRITE16]    ; Get function address
Run_IOWr8
        LDR    R1, [R5, #MDR-MASR]    ; R1 = I/O data; release 486
        AND    R0, R3, R8, LSR #16    ; Get R0 = PC memory address

 [ IOtrace=1                          ; !! test for soundblaster tracing
        CMP    R0, #Traceaddress      ; access SB card DSP reg?
        ;BNE    IOWr8cont
 	STMFD  R13!,{R0-R3,R12,R14}   ; save everything
        ExitSVCmode
        MOV    R2, #H_WRITE8          ; R2=access type
        BL     CPU_TraceIO            ; R0=address, R1=data, go for it!
        EnterSVCmode
        LDMFD  R13!,{R0-R3,R12,R14}   ; put it all back

IOWr8cont
 ]
	AND	R2, R0, # &3FC		; ASSUME: IO chunks are 4 bytes
                                	; Get R2 = chunk no * 4
	ADD	R2, R2, R2, LSL #2	; R2 = chunk no * 20
        ;ADD    R2, R7, R2        	; R2 = IOBase + (chunk no * 20)
                                  	;    = address of handler structure
	LDR	r10,r10_tmp
	LDR	R12, [R2, R7]!
	LDR	PC,  [R2, # H_WRITE8]	; Get function address

Run_MemWr8
	AND	R0, R3, R8, LSR #12	; Get R0 = PC memory address
	SUBS	R2, R0, # DispMemBase	; R2 = address - first address

	LDRHS	R1, [R5, #MDR-MASR]	; Get R1 = data, free 486
	MOVHS	R2, R2, LSR #14    	; ASSUME: mem chunks are 2^14 bytes
                                   	; R2 = chunk no.
	ADDHS	R9, R6, R2, LSL #2 	; R2 = MemBase + (chunk no * 20)
                                   	;    = address of handler structure
                                   	; ASSUME: each handler is 20 bytes
	LDRHS	R12, [R9,R2,LSL#4]!
	LDRHS	PC,  [R9, # H_WRITE8]	; Get function address
	B	Run_MASRerror		; give error if bad

Run_MemWr16
	AND    R0, R3, R8, LSR #12	; Get R0 = PC memory address
	SUBS	R2, R0, # DispMemBase	; R2 = address - first address

	LDRHS	R1, [R5, #MDR-MASR]	; Get R1 = data, free 486
	MOVHS	R2, R2, LSR #14		; ASSUME: mem chunks are 2^14 bytes
					; R2 = chunk no.
	ADDHS	R9, R6, R2, LSL #2	; R2 = MemBase + (chunk no * 20)
					;    = address of handler structure
					; ASSUME: each handler is 20 bytes
	LDRHS	R12, [R9, R2, LSL#4]!
	LDRHS	PC,  [R9, # H_WRITE16]	; Get function address
	B	Run_MASRerror		; give error if bad



        ; Mailbox error:

Run_MASRerror
        STR    R3, CPUS_ErrorWord     ; Save what went wrong for trace

        MOV    R0, #0                 ; Give a dummy read result
        TST    R3, #MASR_Write_bit    ; or clear the write port
        STREQ  R0, [R5, #MDR-MASR]
        LDRNE  R0, [R5, #MDR-MASR]


       ; End of CPUS_Run *******************************************

Run_ModeSave DCD 0



  ; ****************************************************************

  ; Block In/Out routines

  ; The idea of these is to speed up long memory transfers which the 386
  ; does with a block IO instruction. We can save time over the normal
  ; callback method of handling the port input or output because a whole
  ; series of consecutive accesses are made to the same port.

  ; The two routines CPU_BlockIn and CPU_BlockOut are passed
  ; R0 = port number, R1 = pointer to data block on ARM side,
  ; R2 = maximum length of said data block.
  ; They will poll the mailbox: if an access takes place to the specified
  ; port number (either byte or word), the data is moved to or from the
  ; data block at R1. The routines will return when either

  ; 1) a read or write past the specified 'maxlen' is attempted (this is
  ; an error).

  ; 2) a read or write to a location which is not the specified port
  ;     (not an error)

  ; 3) the stopflag is set


CPU_BlockIn ; ( int ioport, BYTE * pointer, int maxlen )
             ; returns int result (in R0 ) saying how many
             ; bytes were successfully read in. Heavily optimised
             ; for word accesses. Must be called in SVC mode
             ; IO port must be even for byte accesses to work!!

       STMFD   SP!, {R4-R7, LR}

       LDR     R5, MASR_load    ; R5->mailbox data

       MOV     R3, #0           ; R3 = number read
       BICS    R2, R2, #1       ; convert to even number
       BEQ     BI_Exit          ; if none left, stop now

       LDR     R7, CPUS_pStopFlag  ; R7 = stop flag address

       ORR     R0, R0, # MASR_Access_bit + MASR_Word_bit

       ; Main loop
       ; At this point
       ; R0 = Target MASR value
       ; R1 = Read pointer
       ; R2 = maxlen  ( at least 1 )
       ; R3 = no. read so far
       ; R5 = MASR address

       ; R4, R6 = working regs
       ; R7 -> stop flag, non-zero to stop

BI_Wait
       LDRB    R4, [R1], #1              ; Get bytes & increment pointer
       LDRB    R6, [R1], #1
       ORR     R4, R4, R6, LSL #8        ; R4 = next word of data

BI_Poll
       LDR     R6, [R5]                  ; Read ISSR
       CMP     R6, R0                    ; Is it the right access?
       BNE     BI_ByteOrOther            ; If not, worry about it later
       STR     R4, [R5, #MDR-MASR]       ; Deliver to PC
       ADD     R3, R3, #2                ; Add 2 to total bytes done
       CMP     R3, R2                    ; Have we exceeded limit?
       BLO     BI_Wait                   ; If not, go back & start
BI_Exit                                  ; Else, exit
       MOV     R0, R3                    ; Return number of bytes read
       LDMFD   SP!, {R4-R7, PC}^

BI_ByteOrOther                           ; Enter here if anything other
                                         ; than a word read, with R6 =
                                         ; actual MASR contents

       TST     R6, # MASR_Access_bit     ; Was there an access??
       BNE     BI_CheckByte              ; If so, check for byte I/O

       LDR     R6, [R7]                  ; If not, check stop flag
       CMP     R6, #0
       BEQ     BI_Poll                   ; If OK, retry
       B       BI_Exit

BI_CheckByte
       EOR     R6, R6, # MASR_Word_bit   ; Is it what we want apart from
       CMP     R6, R0                    ; the byte/word bit?
       BNE     BI_Exit                   ; If not, exit
       STR     R4, [R5, #MDR-MASR]       ; R4 LSB contains next byte
       SUB     R1, R1, #1                ; Adjust R1 for next byte

       ADD     R3, R3, #1                ; Done one more byte
       CMP     R3, R2                    ; Is that it?
       BLO     BI_Wait
       B       BI_Exit

      ; ********************************************************



CPU_BlockOut ; ( int ioport, char *pointer, int maxlen )

             ; returns int result (in R0 ) saying how many
             ; bytes were successfully written in. Heavily optimised
             ; for word accesses. Must be called in SVC mode
             ; IO port must be even for byte accesses to work!!


       STMFD   SP!, {R4-R7, LR}

       LDR     R5, MASR_load    ; R5->mailbox data

       MOV     R3, #0           ; R3 = number written
       BICS    R2, R2, #1       ; convert maxlen to even number
       BEQ     BO_Exit          ; if none left, stop now

       LDR     R7, CPUS_pStopFlag  ; R7 = stop flag address

       ORR     R0, R0, # MASR_Access_bit + MASR_Word_bit + MASR_Write_bit

       ; Main loop
       ; At this point
       ; R0 = Target MASR value
       ; R1 = Write pointer
       ; R2 = maxlen  ( at least 1 )
       ; R3 = no. written so far
       ; R5 = MASR address

       ; R4, R6 = working regs
       ; R7 = timeout count

BO_Poll
       LDR     R6, [R5]                  ; Read ISSR
       CMP     R6, R0                    ; Is it the right access?
       BNE     BO_ByteOrOther            ; If not, worry about it later
       LDR     R4, [R5, #MDR-MASR]       ; Get bytes from PC

       STRB    R4, [R1], #1              ; Store bytes
       MOV     R4, R4, LSR #8
       STRB    R4, [R1], #1
       ADD     R3, R3, #2                ; Add 2 to total bytes done
       CMP     R3, R2                    ; Have we exceeded limit?
       BLO     BO_Poll                   ; If not, go back & start
BO_Exit                                  ; Else, exit
       MOV     R0, R3                    ; Return number of bytes read
       LDMFD   SP!, {R4-R7, PC}^

BO_ByteOrOther                           ; Enter here if anything other
                                         ; than a word read, with R6 =
                                         ; actual MASR contents

       TST     R6, # MASR_Access_bit     ; Was there an access??
       BNE     BO_CheckByte              ; If so, check for byte I/O

       LDR     R6, [R7]                  ; Check timeout flag
       CMP     R6, #0
       BEQ     BO_Poll                   ; If OK, retry
       B       BO_Exit

BO_CheckByte
       EOR     R6, R6, # MASR_Word_bit   ; Is it what we want apart from
       CMP     R6, R0                    ; the byte/word bit?
       BNE     BO_Exit                   ; If not, exit
       LDR     R4, [R5, #MDR-MASR]       ; Get data into R4
       STRB    R4, [R1], #1              ; Adjust R1 for next byte

       ADD     R3, R3, #1                ; Done one more byte
       CMP     R3, R2                    ; Is that it?
       BLO     BO_Poll                   ; If not, go back for more
       B       BO_Exit

 ; Hardware read/write routines ---------------------------------------------

 ; All the initialising is done from C as it's fairly complex. These
 ; routines merely allow us to read/write memory in supervisor mode.

        ; ----------------------

CPUS_Read ; (int addr) returns value at [addr]

        ; Enter with R0 = memory address
        MOV     R1, LR

        TST     R1, #3                      ;Are we in user mode?
        SWIEQ   OS_EnterOS                  ;If so, better enter SVC mode

        LDR     R0, [R0]                    ; Get the result
        MOVS    PC, R1                      ; Return and restore mode

        ; ----------------------

CPUS_Write ; (int addr, data) writes data to [addr]

        ; Enter with R0 = memory address, R1 = data
        MOV     R2, LR

        TST     R2, #3                      ;Are we in user mode?
        SWIEQ   OS_EnterOS                  ;If so, better enter SVC mode

        STR     R1, [R0]                    ; Get the result
        MOVS    PC, R2                      ; Return and restore mode

; Hardware ID ----------------------

CPU_HardwareID DCD CPUS_Name
CPUS_Name      DCB "Risc PC x86 coprocessor card", 0

        END




