; *********************************************************************
;
;  DIVAPC ARM Assembler source
;
;  CPU.S.CPUS_G  Assembler interface to Gemini card
;
;  Versions
;  1.65 24-11-94 INH   Loosely derived from CPUS-E for v1.64
;  1.75 20-03-95       Added CPUS_StopFlag for callbacks
;  1.83 15-05-95       Added StopFlag in block in/out routines
;  2.12 1997.09.24 RW  Optimised Do_access
; *********************************************************************

    GET SYS.S.STDDEFS

; Definitions ---------------------------------------------------------

; Address definitions for podule

MASR        EQU &3730000
MDR         EQU &3730004

; Bit definitions for podule registers

MASR_Access_bit       EQU  1 :SHL: 31
MASR_Word_bit         EQU  1 :SHL: 30
MASR_Write_bit        EQU  1 :SHL: 29
MASR_Mem_bit          EQU  1 :SHL: 28
MASR_DMA_bit          EQU  1 :SHL: 27

; Offsets within 'handler' structure

H_READ8   EQU 0
H_READ16  EQU 4
H_WRITE8  EQU 8
H_WRITE16 EQU 12
H_R12val  EQU 16

DispMemBase EQU &A0000   ; First memory range

; ---------------------------------------------------------------------


        AREA |C$$Code|, CODE, READONLY


        EXPORT      CPUS_Run


        EXPORT      CPU_BlockIn
        EXPORT      CPU_BlockOut

        EXPORT      CPUS_IOArray
        EXPORT      CPUS_MemArray
        EXPORT      CPUS_pStopFlag
        EXPORT      CPUS_ErrorWord

        EXPORT      CPUS_Read
        EXPORT      CPUS_Write
        EXPORT      CPU_HardwareID

        ALIGN

CPUS_IOArray        DCD 0           ; Array of handlers for I/O space
CPUS_MemArray       DCD 0           ; Array of handlers for memory space
CPUS_pStopFlag      DCD 0           ; Pointer to int: non-zero value will
                                    ; stop emulation
CPUS_ErrorWord      DCD 0           ; Non-zero if hardware error during
                                    ; emulation

MASR_load       DCD MASR


CPUS_Run ; (void) *****************************************
        ; This may be called either from any processor mode, and must be
        ; expected to restore the ARM to that mode afterwards

        ; APCS procedure entry

        MOV    IP, SP
        STMFD  SP!, {R4-R9, FP, IP, LR, PC}
        SUB    FP, IP, #4

        ; Initialise

        LDR    R4, CPUS_pStopFlag
        LDR    R5, MASR_load
        LDR    R6, CPUS_MemArray
        LDR    R7, CPUS_IOArray
        MOV    R8, # &FFFFFFFF      ; Used for generating masks

        ; Enter OS and initialise

        MOV    R0, PC
        STR    R0, Run_ModeSave      ; R0 contains CPU mode to return to
        EnterSVCmode

        MOV    R0, #0
        STR    R0, CPUS_ErrorWord

        B      Run_Poll

        ; Start of main polling loop ***********************************

        ; At this point
        ;  R4 = Stop flag
        ;  R5 = MASR address
        ;  R6 = MemArray base
        ;  R7 = IOArray base
        ;  R8 = 0xFFFFFFFF (for mask)

        ; The loop is entered for the first time at label 'Run_Poll'
        ; While waiting for an access to occur, it loops back
        ; to 'Run_NoAccess'. After a callback has occurred,
        ; it returns to 'Run_ChkStopflag'

        ; This code backs off from using the OpenBus when the PC is
        ; not making accesses, to avoid using up bus bandwidth.

Run_NoAccess
        MOV    R0, #5                 ; Initial timeout period
        ;MOV    R0, #30               ; 30 for SA. 5 for ARM

Run_WaitForIt
        LDR    R3, [R4]               ; Get stopflag
        CMP    R3, #0
        BNE    Run_Exit

        LDR    R3, [R5]               ; Try again
        TST    R3, # MASR_Access_bit
        BNE    Run_DoAccess           ; If it's there, do it

        MOV    R1, R0                 ; If not, wait for a bit
Run_WaitLoop                          ; to give PC some bus space
        SUBS   R1, R1, #1
        BNE    Run_WaitLoop

        ADD    R0, R0, R0, LSR #2     ; adds a quarter (rounded)
        CMP    R0, #180               ; to timeout until max of 180
        ;CMP    R0, #1000             ; 1000 for SA
        BLO    Run_WaitForIt          ; is reached, where it stays
        MOV    R0, #180
        ;MOV    R0, #1000
        B      Run_WaitForIt

        ; --------------




Run_IOWr16
        LDR    R1, [R5, #MDR-MASR]    ; R1 = I/O data; release 486
        AND    R0, R3, R8, LSR #16    ; Get R0 = PC memory address

        AND    R2, R0, # &3FC         ; ASSUME: IO chunks are 4 bytes
                                      ; Get R2 = chunk no * 4
        ADD    R2, R2, R2, LSL #2     ; R2 = chunk no * 20
        ADD    R2, R7, R2             ; R2 = IOBase + (chunk no * 20)
                                      ;    = address of handler structure
        LDR    R12, [R2, # H_R12val]
	ADR	LR,Run_CheckStop+3
        LDR    PC,  [R2, # H_WRITE16]    ; Get function address
Run_IOWr8
        LDR    R1, [R5, #MDR-MASR]    ; R1 = I/O data; release 486
        AND    R0, R3, R8, LSR #16    ; Get R0 = PC memory address

        AND    R2, R0, # &3FC         ; ASSUME: IO chunks are 4 bytes
                                      ; Get R2 = chunk no * 4
        ADD    R2, R2, R2, LSL #2     ; R2 = chunk no * 20
        ADD    R2, R7, R2             ; R2 = IOBase + (chunk no * 20)
                                      ;    = address of handler structure
        LDR    R12, [R2, # H_R12val]
	ADR	LR,Run_CheckStop+3
        LDR    PC,  [R2, # H_WRITE8]    ; Get function address

Run_IORd8
        AND    R0, R3, R8, LSR #16    ; Get R0 = PC memory address

        AND    R2, R0, # &3FC         ; ASSUME: IO chunks are 4 bytes
                                      ; Get R2 = chunk no * 4
        ADD    R2, R2, R2, LSL #2     ; R2 = chunk no * 20
        ADD    R2, R7, R2             ; R2 = IOBase + (chunk no * 20)
                                      ;    = address of handler structure
        LDR    R12, [R2, # H_R12val]
	ADR	LR,PreRun_CheckStop+3
        LDR    PC,  [R2, # H_READ8]     ; Get function address

Run_MemWr8
        AND    R0, R3, R8, LSR #12    ; Get R0 = PC memory address
        SUBS   R2, R0, # DispMemBase  ; R2 = address - first address

        LDRHS    R1, [R5, #MDR-MASR]    ; Get R1 = data, free 486
        MOVHS    R2, R2, LSR #14        ; ASSUME: mem chunks are 2^14 bytes
                                      ; R2 = chunk no.
        ADDHS    R2, R2, R2, LSL #2     ; R2 = chunk no * 5
        ADDHS    R2, R6, R2, LSL #2     ; R2 = MemBase + (chunk no * 20)
                                      ;    = address of handler structure
                                      ; ASSUME: each handler is 20 bytes
        LDRHS    R12, [R2, # H_R12val]
	ADRHS	LR,Run_CheckStop+3
        LDRHS    PC,  [R2, # H_WRITE8]    ; Get function address
        B    Run_MASRerror          ; give error if bad

Run_MemWr16
        AND	R0, R3, R8, LSR #12    ; Get R0 = PC memory address
        SUBS	R2, R0, # DispMemBase  ; R2 = address - first address

        LDRHS	R1, [R5, #MDR-MASR]    ; Get R1 = data, free 486
        MOVHS	R2, R2, LSR #14        ; ASSUME: mem chunks are 2^14 bytes
                                      ; R2 = chunk no.
        ADDHS	R2, R2, R2, LSL #2     ; R2 = chunk no * 5
        ADDHS	R2, R6, R2, LSL #2     ; R2 = MemBase + (chunk no * 20)
                                      ;    = address of handler structure
                                      ; ASSUME: each handler is 20 bytes
        LDRHS	R12, [R2, # H_R12val]
	ADRHS	LR,Run_CheckStop+3
        LDRHS	PC,  [R2, # H_WRITE16]   ; Get function address
        B	Run_MASRerror          ; give error if bad

        ; IO dispatch routines

PreRun_CheckStop
        STR    R0, [R5, #MDR-MASR]    ; Deliver to PC
Run_CheckStop
        LDR    R3, [R4]               ; Has a callback routine stopped
        CMP    R3, #0                 ; the emulation? Exit if so.
        BNE    Run_Exit
Run_Poll
        LDR    R3, [R5]               ; Get MASR contents into R3
Run_DoAccess
        ;ADD    PC, PC, R3, LSR #25    ; Convert to dispatch table entry
        LDR    PC, [PC, R3, LSR #25]    ; Convert to dispatch table entry
                                      ; Convert DMA bit (27) to bit 2.
                                      ; Bits 25 and 26 should read as
                                      ; zero, but in fact it won't
                                      ; matter because we're not doing
                                      ; ADDS
        MOVNV  R0, R0                 ; Padding to allow for PC value


        ; Dispatch table
        ; Entries in this table are for bits as follows
        ;   DMA flag = 1
        ; + Mem flag = 2
        ; + Write flag = 4
        ; + Word flag = 8
        ; + Access_present flag = 16

        ; The first 16 entries are all zero (no access)
        ; Currently we ignore the DMA flag (!!)
DispTable
	DCD	Run_NoAccess
	DCD	Run_NoAccess
	DCD	Run_NoAccess
	DCD	Run_NoAccess

	DCD	Run_NoAccess
	DCD	Run_NoAccess
	DCD	Run_NoAccess
	DCD	Run_NoAccess

	DCD	Run_NoAccess
	DCD	Run_NoAccess
	DCD	Run_NoAccess
	DCD	Run_NoAccess

	DCD	Run_NoAccess
	DCD	Run_NoAccess
	DCD	Run_NoAccess
	DCD	Run_NoAccess

	DCD	Run_IORd8
	DCD	Run_IORd8
	DCD	Run_MemRd8
	DCD	Run_MemRd8

	DCD	Run_IOWr8
	DCD	Run_IOWr8
	DCD	Run_MemWr8
	DCD	Run_MemWr8

	DCD	Run_IORd16
	DCD	Run_IORd16
	DCD	Run_MemRd16
	DCD	Run_MemRd16

	DCD	Run_IOWr16
	DCD	Run_IOWr16
	DCD	Run_MemWr16
	DCD	Run_MemWr16

        ; Memory dispatch routines
Run_MemRd8
        AND    R0, R3, R8, LSR #12    ; Get R0 = PC memory address
        SUBS   R2, R0, # DispMemBase  ; R2 = address - first address

        MOVHS    R2, R2, LSR #14        ; ASSUME: mem chunks are 2^14 bytes
                                      ; R2 = chunk no.
        ADDHS    R2, R2, R2, LSL #2     ; R2 = chunk no * 5
        ADDHS    R2, R6, R2, LSL #2     ; R2 = MemBase + (chunk no * 20)
                                      ;    = address of handler structure
                                      ; ASSUME: each handler is 20 bytes
        LDRHS    R12, [R2, # H_R12val]
	; Add 3 to ensure we arrive back in SVC mode
	ADRHS	LR,PreRun_CheckStop+3
        LDRHS	PC,  [R2, # H_READ8]     ; Get function address
        B	Run_MASRerror          ; give error if bad

Run_MemRd16
        AND    R0, R3, R8, LSR #12    ; Get R0 = PC memory address
        SUBS   R2, R0, # DispMemBase  ; R2 = address - first address

        MOVHS    R2, R2, LSR #14        ; ASSUME: mem chunks are 2^14 bytes
                                      ; R2 = chunk no.
        ADDHS    R2, R2, R2, LSL #2     ; R2 = chunk no * 5
        ADDHS    R2, R6, R2, LSL #2     ; R2 = MemBase + (chunk no * 20)
                                      ;    = address of handler structure
                                      ; ASSUME: each handler is 20 bytes
        LDRHS    R12, [R2, # H_R12val]
	; Add 3 to ensure we arrive back in SVC mode
	ADRHS	LR,PreRun_CheckStop+3
        LDRHS    PC,  [R2, # H_READ16]     ; Get function address
        B    Run_MASRerror          ; give error if bad

Run_IORd16
        AND    R0, R3, R8, LSR #16    ; Get R0 = PC memory address

        AND    R2, R0, # &3FC         ; ASSUME: IO chunks are 4 bytes
                                      ; Get R2 = chunk no * 4
        ADD    R2, R2, R2, LSL #2     ; R2 = chunk no * 20
        ADD    R2, R7, R2             ; R2 = IOBase + (chunk no * 20)
                                      ;    = address of handler structure
        LDR    R12, [R2, # H_R12val]
	ADR	LR,PreRun_CheckStop+3
        LDR    PC,  [R2, # H_READ16]    ; Get function address

        ; Exit points --------------------

        ; Mailbox error:

Run_MASRerror
        STR    R3, CPUS_ErrorWord     ; Save what went wrong for trace

        MOV    R0, #0                 ; Give a dummy read result
        TST    R3, #MASR_Write_bit    ; or clear the write port
        STREQ  R0, [R5, #MDR-MASR]
        LDRNE  R0, [R5, #MDR-MASR]


        ; Restore processor mode ****************************
Run_Exit
        LDR    R0, Run_ModeSave     ; Get back initial mode value
        TEQP   R0, #0               ; Copy flags to PC
        MOVNV  R0, R0               ; wait for it to settle

        ; APCS exit

        LDMDB  FP, {R4-R9, FP, SP, PC}^


       ; End of CPUS_Run *******************************************

Run_ModeSave DCD 0



  ; ****************************************************************

  ; Block In/Out routines

  ; The idea of these is to speed up long memory transfers which the 386
  ; does with a block IO instruction. We can save time over the normal
  ; callback method of handling the port input or output because a whole
  ; series of consecutive accesses are made to the same port.

  ; The two routines CPU_BlockIn and CPU_BlockOut are passed
  ; R0 = port number, R1 = pointer to data block on ARM side,
  ; R2 = maximum length of said data block.
  ; They will poll the mailbox: if an access takes place to the specified
  ; port number (either byte or word), the data is moved to or from the
  ; data block at R1. The routines will return when either

  ; 1) a read or write past the specified 'maxlen' is attempted (this is
  ; an error).

  ; 2) a read or write to a location which is not the specified port
  ;     (not an error)

  ; 3) the stopflag is set


CPU_BlockIn ; ( int ioport, BYTE * pointer, int maxlen )
             ; returns int result (in R0 ) saying how many
             ; bytes were successfully read in. Heavily optimised
             ; for word accesses. Must be called in SVC mode
             ; IO port must be even for byte accesses to work!!

       STMFD   SP!, {R4-R7, LR}

       LDR     R5, MASR_load    ; R5->mailbox data

       MOV     R3, #0           ; R3 = number read
       BICS    R2, R2, #1       ; convert to even number
       BEQ     BI_Exit          ; if none left, stop now

       LDR     R7, CPUS_pStopFlag  ; R7 = stop flag address

       ORR     R0, R0, # MASR_Access_bit + MASR_Word_bit

       ; Main loop
       ; At this point
       ; R0 = Target MASR value
       ; R1 = Read pointer
       ; R2 = maxlen  ( at least 1 )
       ; R3 = no. read so far
       ; R5 = MASR address

       ; R4, R6 = working regs
       ; R7 -> stop flag, non-zero to stop

BI_Wait
       LDRB    R4, [R1], #1              ; Get bytes & increment pointer
       LDRB    R6, [R1], #1
       ORR     R4, R4, R6, LSL #8        ; R4 = next word of data

BI_Poll
       LDR     R6, [R5]                  ; Read ISSR
       CMP     R6, R0                    ; Is it the right access?
       BNE     BI_ByteOrOther            ; If not, worry about it later
       STR     R4, [R5, #MDR-MASR]       ; Deliver to PC
       ADD     R3, R3, #2                ; Add 2 to total bytes done
       CMP     R3, R2                    ; Have we exceeded limit?
       BLO     BI_Wait                   ; If not, go back & start
BI_Exit                                  ; Else, exit
       MOV     R0, R3                    ; Return number of bytes read
       LDMFD   SP!, {R4-R7, PC}^

BI_ByteOrOther                           ; Enter here if anything other
                                         ; than a word read, with R6 =
                                         ; actual MASR contents

       TST     R6, # MASR_Access_bit     ; Was there an access??
       BNE     BI_CheckByte              ; If so, check for byte I/O

       LDR     R6, [R7]                  ; If not, check stop flag
       CMP     R6, #0
       BEQ     BI_Poll                   ; If OK, retry
       B       BI_Exit

BI_CheckByte
       EOR     R6, R6, # MASR_Word_bit   ; Is it what we want apart from
       CMP     R6, R0                    ; the byte/word bit?
       BNE     BI_Exit                   ; If not, exit
       STR     R4, [R5, #MDR-MASR]       ; R4 LSB contains next byte
       SUB     R1, R1, #1                ; Adjust R1 for next byte

       ADD     R3, R3, #1                ; Done one more byte
       CMP     R3, R2                    ; Is that it?
       BLO     BI_Wait
       B       BI_Exit

      ; ********************************************************



CPU_BlockOut ; ( int ioport, char *pointer, int maxlen )

             ; returns int result (in R0 ) saying how many
             ; bytes were successfully written in. Heavily optimised
             ; for word accesses. Must be called in SVC mode
             ; IO port must be even for byte accesses to work!!


       STMFD   SP!, {R4-R7, LR}

       LDR     R5, MASR_load    ; R5->mailbox data

       MOV     R3, #0           ; R3 = number written
       BICS    R2, R2, #1       ; convert maxlen to even number
       BEQ     BO_Exit          ; if none left, stop now

       LDR     R7, CPUS_pStopFlag  ; R7 = stop flag address

       ORR     R0, R0, # MASR_Access_bit + MASR_Word_bit + MASR_Write_bit

       ; Main loop
       ; At this point
       ; R0 = Target MASR value
       ; R1 = Write pointer
       ; R2 = maxlen  ( at least 1 )
       ; R3 = no. written so far
       ; R5 = MASR address

       ; R4, R6 = working regs
       ; R7 = timeout count

BO_Poll
       LDR     R6, [R5]                  ; Read ISSR
       CMP     R6, R0                    ; Is it the right access?
       BNE     BO_ByteOrOther            ; If not, worry about it later
       LDR     R4, [R5, #MDR-MASR]       ; Get bytes from PC

       STRB    R4, [R1], #1              ; Store bytes
       MOV     R4, R4, LSR #8
       STRB    R4, [R1], #1
       ADD     R3, R3, #2                ; Add 2 to total bytes done
       CMP     R3, R2                    ; Have we exceeded limit?
       BLO     BO_Poll                   ; If not, go back & start
BO_Exit                                  ; Else, exit
       MOV     R0, R3                    ; Return number of bytes read
       LDMFD   SP!, {R4-R7, PC}^

BO_ByteOrOther                           ; Enter here if anything other
                                         ; than a word read, with R6 =
                                         ; actual MASR contents

       TST     R6, # MASR_Access_bit     ; Was there an access??
       BNE     BO_CheckByte              ; If so, check for byte I/O

       LDR     R6, [R7]                  ; Check timeout flag
       CMP     R6, #0
       BEQ     BO_Poll                   ; If OK, retry
       B       BO_Exit

BO_CheckByte
       EOR     R6, R6, # MASR_Word_bit   ; Is it what we want apart from
       CMP     R6, R0                    ; the byte/word bit?
       BNE     BO_Exit                   ; If not, exit
       LDR     R4, [R5, #MDR-MASR]       ; Get data into R4
       STRB    R4, [R1], #1              ; Adjust R1 for next byte

       ADD     R3, R3, #1                ; Done one more byte
       CMP     R3, R2                    ; Is that it?
       BLO     BO_Poll                   ; If not, go back for more
       B       BO_Exit

 ; Hardware read/write routines ---------------------------------------------

 ; All the initialising is done from C as it's fairly complex. These
 ; routines merely allow us to read/write memory in supervisor mode.

        ; ----------------------

CPUS_Read ; (int addr) returns value at [addr]

        ; Enter with R0 = memory address
        MOV     R1, LR

        TST     R1, #3                      ;Are we in user mode?
        SWIEQ   OS_EnterOS                  ;If so, better enter SVC mode

        LDR     R0, [R0]                    ; Get the result
        MOVS    PC, R1                      ; Return and restore mode

        ; ----------------------

CPUS_Write ; (int addr, data) writes data to [addr]

        ; Enter with R0 = memory address, R1 = data
        MOV     R2, LR

        TST     R2, #3                      ;Are we in user mode?
        SWIEQ   OS_EnterOS                  ;If so, better enter SVC mode

        STR     R1, [R0]                    ; Get the result
        MOVS    PC, R2                      ; Return and restore mode

; Hardware ID ----------------------

CPU_HardwareID DCD CPUS_Name
CPUS_Name      DCB "Risc PC x86 coprocessor card", 0

        END




