 GET TimLib:hdr.System
 GET TimLib:hdr.Macros
 GET hdr.Division
 GET hdr.GlobHdr
 GET hdr.Utils

 AREA |!!!Module_Header_Area|,CODE,READONLY

 ENTRY

Module_Header
	DCD 0 ; RM_Start
	DCD (RM_Initialise  - Module_Header)
	DCD (RM_Finalise    - Module_Header)
	DCD (RM_ServiceCall - Module_Header)
	DCD (RM_TitleStr    - Module_Header)
	DCD (RM_HelpStr     - Module_Header)
	DCD 0 ; RM_Cmds
	DCD &52780 ; SWI chunk
	DCD (RM_SWICall     - Module_Header)
	DCD (RM_SWITable    - Module_Header)
	DCD 0 ; RM_SWIDecode
	DCD 0 ; RM_MessageFile
	DCD (RM_Flags       - Module_Header)

	_MODNAME DCDUtils,036,"02 Apr 2020",,"Andr Timmermans"

RM_SWITable
 = "DCDUtils",0
 = "Version",0              ; 00
 = "RegisterPlayer",0       ; 01
 = "UnregisterPlayer",0     ; 02
 = "RegisterPlugIn",0       ; 03
 = "UnregisterPlugIn",0     ; 04
 = "SetPlayerInfo",0        ; 05
 = "GetPlayerInfo",0        ; 06
 = "FillBuffer",0           ; 07
 = "GetFullScreenPlugIn",0  ; 08
 = "FillRawBuffer",0        ; 09
 = "StopPlugIns",0          ; 0A
 = "UpdatePlugInsPosition",0; 0B
 = "GetBufferRate",0        ; 0C
 = "PlayerHasPlugIn",0      ; 0D
 = "FillABuffer",0          ; 0E
 = "PlayerInfo",0           ; 0F
 = "PlugInInfo",0           ; 10
 = "FillFFT",0              ; 11
 = "BufferSizes",0          ; 12
 = "PlayerFindPlugIn",0     ; 13
 = "SetSaverTask",0         ; 14
 = "RemoteControl",0        ; 15
 = "ListPlayers",0          ; 16

 = 0
 ALIGN
RM_Flags
 DCD 1 ; 32-bit compatible

MaxVolume * 256*100

;===============================================================================

 _GENRETURNS

 ALIGN
RM_Initialise _FNAME
 _DEFPROCV "R7-R11"

 ; find sounddrivers
 MOV     R9,#0
initialise_sounddriver_loop
 MOV     R0,#0
 ADD     R1,R9,#1
 SWI     &6DBC2 ; XSoundDriver_Info
 BVS     initialise_sounddriver_loop_End
; CMP     R3,#0
 CMP     R0,#-1
 ADDNE   R9,R9,#1
 BNE     initialise_sounddriver_loop
initialise_sounddriver_loop_End

 ; claim memory for globals
 MOV     R0,#6
 MOV     R3,#(size_glbmem:AND:&3FFF)
 ORR     R3,R3,#(size_glbmem:AND:&FFFFC000)
 MOV     R2,#Size_Driver
 MLA     R3,R2,R9,R3
 SWI     XOS_Module
 _ENDPROC VS
 STR     R2,[R12]
 MOV     R0,R2
 MOV     R1,R3
 BL      Mem_Clear
 MOV     R12,R2
 STR     R9,[R12,#Glb_SoundDrivers]

 ; find hardware buffers
 BL      GetSoundDMA

 ; initialise DMA/sound driver info
 LDR     R9,[R12,#Glb_SoundDrivers]

initialise_sd_loop
 ADD     R11,R12,#Glb_Buffers
 MOV     R0,#Size_Driver
 MLA     R11,R9,R0,R11
 ; This avoid later to think the buffers are still not completely filled
 MOV     R0,#0 ; read
 MOVS    R1,R9
 SWINE   &6DBC3 ; XSoundDriver_Control
 CMP     R1,R9
 SUBNE   R3,R3,#(Driver_DataEnd - Driver_Data)
 MOVEQ   R3,#0  ; DMA
 STR     R3,[R11,#Driver_Time]
 LDR     R0,const_riff
 STR     R0,[R11,#Driver_RIFF]
 LDR     R0,const_ffir
 _LADD   R1,R11,Driver_FFIR
 STR     R0,[R1]
 SUBS    R9,R9,#1
 BGE     initialise_sd_loop
initialise_sd_loop_end

 BL      Timer_Init

 MOV     R0,#-1
 STR     R0,[R12,#Glb_FullScreenPlugIn]

 ; initialise player table
 ADD     R11,R12,#Glb_Players
 MOV     R10,#size_player
 MOV     R9,#max_players
 MOV     R0,#0
initialise_players_loop
 STR     R0,[R11,#player_flags]
 ADD     R11,R11,R10
 SUBS    R9,R9,#1
 BGT     initialise_players_loop

 ; initialise plugin table
 ADD     R11,R12,#Glb_Plugins
 MOV     R10,#size_plugin
 MOV     R9,#max_plugins
 MOV     R0,#0
initialise_plugins_loop
 STR     R0,[R11,#plugin_flags]
 ADD     R11,R11,R10
 SUBS    R9,R9,#1
 BGT     initialise_plugins_loop

 ; Flags
 MOV     R0,#0
 STR     R0,[R12,#Glb_Flags]
 ; Check if CPU supports SMLA
 MOV     R0,#0
 MOV     R1,#0
 MOV     R2,#251
 MOV     R3,#241
 SMLAL   R0,R1,R2,R3
 MUL     R1,R2,R3
 CMP     R0,R1
 LDREQ   R0,[R12,#Glb_Flags]
 ORREQ   R0,R0,#glb_flag_SMLASupport
 STREQ   R0,[R12,#Glb_Flags]

 ; Determine default FFT to use
 SWI     XOS_ReadMonotonicTime
 MOV     R4,R0

initialise_FFT_StartLoop
 SWI     XOS_ReadMonotonicTime
 CMP     R4,R0
 BEQ     initialise_FFT_StartLoop

 ADD     R4,R4,#100
 MOV     R5,#0
initialise_FFT_MainLoop
 MOV     R0,#1<<31
 ; we just need a buffer of the right size, contents is irrelevant
 ADD     R2,R12,#Glb_Buffers
 ADD     R1,R2,#Driver_FFT
 ADD     R2,R2,#Driver_FFTEnd
 MOV     R3,#FFTLog2Size<<4
 BL      swi_FillFFT
 ADD     R5,R5,#1
 SWI     XOS_ReadMonotonicTime
 SUBS    R0,R4,R0
 BGT     initialise_FFT_MainLoop

 MOV     R0,#FFTLog2Size
 CMP     R5,#50
 SUBLT   R0,R0,#1
 CMP     R5,#16
 SUBLT   R0,R0,#1
 CMP     R5,#8
 SUBLT   R0,R0,#1
 CMP     R5,#4
 SUBLT   R0,R0,#1
 STR     R0,[R12,#Glb_DefaultFFT]

 ; Start DMA buffer filling
 MOV     R0,#1
 ADRL    R1,Buffers_Fill
 MOV     R2,R12
 SWI     XOS_CallAfter

 _ENDPROC

const_riff
 = "RIFF"
const_ffir
 = "FFIR"

;===============================================================================

Err_StillPlayers
 DCD     0
 = "There are still registed players",0

 ALIGN
RM_Finalise _FNAME
 _DEFPROCV "R6-R11"
 MOV     R6,R12
 LDR     R12,[R12]

; Refuse if players are still registered
 ADD     R11,R12,#Glb_Players
 MOV     R10,#size_player
 MOV     R9,#max_players
Finalise_CheckPlayer_Loop
 LDR     R0,[R11,#player_flags]
 TST     R0,#player_flag_loaded
 _ERROR  NE,Err_StillPlayers
 ADD     R11,R11,R10
 SUBS    R9,R9,#1
 BGT     Finalise_CheckPlayer_Loop

 ; Stop buffer filling
 ADRL    R0,Buffers_Fill
 MOV     R1,R12
 SWI     XOS_RemoveTickerEvent

 ADRL    R0,Buffers_CallBack
 MOV     R1,R12
 SWI     XOS_RemoveCallBack

 ; ensure sounddrivers aren't still recording
 LDR     R9,[R12,#Glb_SoundDrivers]
finalise_sd_loop
 ADD     R11,R12,#Glb_Buffers
 MOV     R0,#Size_Driver
 MLA     R11,R9,R0,R11
 ; Check if not still recording a previous buffer
 MOV     R0,#0 ; read
 MOV     R1,R9
 SWI     &6DBC3 ; XSoundDriver_Control
                ; set counter in R3
 BVS     finalise_sd_loop_next
 LDR     R0,[R11,#Driver_Time]
 SUBS    R0,R0,R3
 RSBLT   R0,R0,#0
 MOV     R0,R0,LSR #2
 CMP     R0,#SmpSize
 MOV     R0,#1 ; write
 MOV     R1,R9
 MOV     R2,#0
 MOV     R3,#0
 MOV     R4,#0
 MOV     R5,#0
 SWI     &6DBC3 ; XSoundDriver_Control
finalise_sd_loop_next
 SUBS    R9,R9,#1
 BGT     finalise_sd_loop
 ; free claimed memory
 MOV     R0,#7
 MOV     R2,R12
 SWI     XOS_Module
 MOV     R0,#0
 STR     R0,[R6]
 _ENDPROC

;===============================================================================

RM_ServiceCall_Table
 DCD     0
 DCD     (RM_ServiceCall_Start - Module_Header)
 DCD     &53
 DCD     0
 DCD     (RM_ServiceCall_Table - Module_Header)
 ALIGN
RM_ServiceCall
 MOV     R0,R0
 TEQ     R1,#&53 ; WimpCloseDown
 MOVNE   PC,R14
RM_ServiceCall_Start
 _DEFPROCV "R0-R12"
 ; R2 is task handle
 CMP     R2,#0
 BEQ     Service_End

 LDR     R12,[R12]

 ; is R2 the saver task?
 LDR     R0,[R12,#Glb_SaverTask]
 CMP     R0,R2
 MOVEQ   R0,#0
 STREQ   R0,[R12,#Glb_SaverTask]

 ; is R2 the task in the list of plug-ins?
 ADD     R11,R12,#Glb_Plugins
 MOV     R10,#size_plugin
 MOV     R9,#max_plugins
Service_LocatePlugIn_Loop
 LDR     R0,[R11,#plugin_flags]
 TST     R0,#plugin_flag_loaded
 BEQ     Service_LocatePlugIn_LoopNext
 LDR     R0,[R11,#plugin_task]
 CMP     R0,R2
 BNE     Service_LocatePlugIn_LoopNext
 RSB     R0,R9,#max_plugins
 BL      swi_UnregisterPlugIn
Service_LocatePlugIn_LoopNext
 ADD     R11,R11,R10
 SUBS    R9,R9,#1
 BGT     Service_LocatePlugIn_Loop
Service_LocatePlugIn_LoopEnd

 ; is R2 the task in the list of players?
 ADD     R11,R12,#Glb_Players
 MOV     R10,#size_player
 MOV     R9,#max_players
Service_LocatePlayer_Loop
 LDR     R0,[R11,#player_flags]
 TST     R0,#player_flag_loaded
 BEQ     Service_LocatePlayer_LoopNext
 LDR     R0,[R11,#player_task]
 CMP     R0,R2
 BNE     Service_LocatePlayer_LoopNext
 RSB     R0,R9,#max_players
 BL      swi_UnregisterPlayer
Service_LocatePlayer_LoopNext
 ADD     R11,R11,R10
 SUBS    R9,R9,#1
 BGT     Service_LocatePlayer_Loop
Service_LocatePlayer_LoopEnd

Service_End
 _ENDPROC

;===============================================================================
; SWI handler code entry point
;===============================================================================

 ALIGN
RM_SWICall _FNAME
 LDR     R12,[R12]
 CMP     R11,#(swi_code_tableend-swi_code_tablestart)/4
 ADDCC   PC,PC,R11,LSL#2
 B       swi_code_tableend
swi_code_tablestart
 B       swi_Version
 B       swi_RegisterPlayer
 B       swi_UnregisterPlayer
 B       swi_RegisterPlugIn
 B       swi_UnregisterPlugIn
 B       swi_SetPlayerInfo
 B       swi_GetPlayerInfo
 B       swi_FillBuffer
 B       swi_GetFullScreenPlugIn
 B       swi_FillRawBuffer
 B       swi_StopPlugIns
 B       swi_UpdatePlugInsPosition
 B       swi_GetBufferRate
 B       swi_PlayerHasPlugIn
 B       swi_FillABuffer
 B       swi_PlayerInfo
 B       swi_PlugInInfo
 B       swi_FillFFT
 B       swi_BufferSizes
 B       swi_PlayerFindPlugIn
 B       swi_SetSaverTask
 B       swi_RemoteControl
 B       swi_ListPlayers
swi_code_tableend
 _DEFPROC
 ADR     R0,unknown_swi
 MOV     R1,#0
 MOV     R2,#0
 ADRL    R4,RM_TitleStr
 SWI     XMessageTrans_ErrorLookup
 _ENDPROC
unknown_swi
 DCD     &1E6
 = "BadSWI",0

;-------------------------------------------------------------------------------
; SWI Version
;
; In  - R12 Global Header
;
; Out - R0  version * 100
;-------------------------------------------------------------------------------

 ALIGN
swi_Version _FNAME
 MOV     R0,#RM_Version
 MOV     PC,R14

;-------------------------------------------------------------------------------
; SWI ListPlayers
;
; In  - R0  -1 or previously returned player id
;       R12 Global Header
;
; Out - R0  next player id or -1
;-------------------------------------------------------------------------------

 ALIGN
swi_ListPlayers _FNAME
 _DEFPROCV "R1,R9-R11"

 ; locate current player
 ADD     R11,R12,#Glb_Players
 MOV     R10,#size_player
 MOV     R9,#max_players
 CMP     R0,#-1
 MOVEQ   R0,#0
 ADDNE   R0,R0,#1
 MLA     R11,R0,R10,R11
swi_LisPlayers_Loop
 CMP     R0,R9
 BHS     swi_LisPlayers_None
 LDR     R1,[R11,#player_flags]
 TST     R1,#player_flag_loaded
 BNE     swi_LisPlayers_Return
 ADD     R11,R11,R10
 ADD     R0,R0,#1
 B       swi_LisPlayers_Loop
swi_LisPlayers_None
 MOV     R0,#-1
swi_LisPlayers_Return
 _ENDPROC

;-------------------------------------------------------------------------------
; SWI RegisterPlayer
;
; In  - R12 Global Header
;
; Out - R0  player handle
;-------------------------------------------------------------------------------

Err_TooManyPlayers
 DCD     0
 = "Too many registed players",0

 ALIGN
swi_RegisterPlayer _FNAME
 _DEFPROCV "R9-R11"

 ; locate free player slot
 ADD     R11,R12,#Glb_Players
 MOV     R10,#size_player
 MOV     R9,#max_players
RegisterPlayer_Loop
 LDR     R0,[R11,#player_flags]
 TST     R0,#player_flag_loaded
 BEQ     RegisterPlayer_LoopEnd
 ADD     R11,R11,R10
 SUBS    R9,R9,#1
 BGT     RegisterPlayer_Loop
RegisterPlayer_LoopEnd
 CMP     R9,#0
 _ERROR  LE,Err_TooManyPlayers

 MOV     R0,R11
 MOV     R1,#size_player
 BL      Mem_Clear

 MOV     R0,#player_flag_loaded
 STR     R0,[R11,#player_flags]
 MOV     R10,#256
 STR     R10,[R11,#player_songvolume]
 MOV     R10,#MaxVolume
 STR     R10,[R11,#player_songfinevolume]
 STR     R10,[R11,#player_songscalevolume]

 RSB     R0,R9,#max_players

 _ENDPROC

;-------------------------------------------------------------------------------
; SWI UnregisterPlayer
;
; In  - R0  player handle
;       R12 Global Header
;
; Out -
;-------------------------------------------------------------------------------

Err_Bad_Player_Handle
 DCD     0
 = "Incorrect player handle",0

 ALIGN
Player_SlotFromHandle _FNAME
 _DEFPROCV "R10"

 CMP     R0,#max_players
 _ERROR  HS,Err_Bad_Player_Handle
 ADD     R11,R12,#Glb_Players
 MOV     R10,#size_player
 MLA     R11,R0,R10,R11
 LDR     R10,[R11,#player_flags]
 TST     R10,#player_flag_loaded
 _ERROR  EQ,Err_Bad_Player_Handle

 _ENDPROC

 ALIGN
swi_UnregisterPlayer _FNAME
 _DEFPROCV "R10-R11"

 BL      swi_StopPlugIns
 _ENDPROC VS

 BL      Player_SlotFromHandle
 _ENDPROC VS

 MOV     R10,#0
 STR     R10,[R11,#player_flags]

 _ENDPROC

;-------------------------------------------------------------------------------
; SWI RegisterPlugIn
;
; In  - R1  player handle
;       R2  plug-in task handle
;       R3  flags
;            0x00000001 full screen plug-in
;       R12 Global Header
;
; Out - R0  plugin handle
;-------------------------------------------------------------------------------

Err_TooManyPlugIns
 DCD     0
 = "Too many registed plugins",0
 ALIGN
Err_OneFullScreenPlugIn
 DCD     0
 = "A full screen plug-in is currently active",0

 ALIGN
swi_RegisterPlugIn _FNAME
 _DEFPROCV "R3,R9-R11"

 ; check player validity
 MOV     R0,R1
 BL      Player_SlotFromHandle
 _ENDPROC VS
 ; only a single full screen plug-in is allowed
 LDR     R11,[R12,#Glb_FullScreenPlugIn]
 CMP     R11,#-1
 _ERROR  NE,Err_OneFullScreenPlugIn

 ; locate free player slot
 ADD     R11,R12,#Glb_Plugins
 MOV     R10,#size_plugin
 MOV     R9,#max_plugins
RegisterPlugIn_Loop
 LDR     R0,[R11,#plugin_flags]
 TST     R0,#plugin_flag_loaded
 BEQ     RegisterPlugIn_LoopEnd
 ADD     R11,R11,R10
 SUBS    R9,R9,#1
 BGT     RegisterPlugIn_Loop
RegisterPlugIn_LoopEnd
 CMP     R9,#0
 _ERROR  LE,Err_TooManyPlugIns

 AND     R3,R3,#plugin_flag_mask
 ORR     R3,R3,#plugin_flag_loaded
 STR     R1,[R11,#plugin_player]
 STR     R2,[R11,#plugin_task]
 STR     R3,[R11,#plugin_flags]

 RSB     R0,R9,#max_plugins

 ; full screen plug-in?
 TST     R3,#plugin_flag_fullscreen
 STRNE   R0,[R12,#Glb_FullScreenPlugIn]
 BLNE    SaveDesktop

 _ENDPROC

;-------------------------------------------------------------------------------
; SWI UnregisterPlugins
;
; In  - R0  plugin handle
;       R12 Global Header
;
; Out -
;-------------------------------------------------------------------------------

Err_Bad_PlugIn_Handle
 DCD     0
 = "Incorrect plug-in handle",0

 ALIGN
PlugIn_SlotFromHandle _FNAME
 _DEFPROCV "R10"

 CMP     R0,#max_plugins
 _ERROR  HS,Err_Bad_PlugIn_Handle
 ADD     R11,R12,#Glb_Plugins
 MOV     R10,#size_plugin
 MLA     R11,R0,R10,R11
 LDR     R10,[R11,#plugin_flags]
 TST     R10,#plugin_flag_loaded
 _ERROR  EQ,Err_Bad_PlugIn_Handle

 _ENDPROC

 ALIGN
swi_UnregisterPlugIn _FNAME
 _DEFPROCV "R1,R10-R11"

 MOV     R10,R0

 BL      PlugIn_SlotFromHandle
 _ENDPROC VS

 MOV     R1,#0
 STR     R1,[R11,#plugin_flags]

 LDR     R11,[R12,#Glb_FullScreenPlugIn]
 CMP     R11,R10
 MOVEQ   R11,#-1
 STREQ   R11,[R12,#Glb_FullScreenPlugIn]
 BLEQ    RestoreDesktop

 _ENDPROC

;-------------------------------------------------------------------------------
; SWI SetPlayerInfo
;
; In  - R0  player handle
;       R1  control panel window handle
;       R2  songs counter
;       R3  song volume [0-256]
;       R4  song name ptr (null terminated)
;       R5  sound origin
;       R12 Global Header
;
; Out - all preserved or err block
;-------------------------------------------------------------------------------

Err_Bad_Sound_Origin
 DCD     0
 = "Invalid SoundDriver number",0

 ALIGN
swi_SetPlayerInfo _FNAME
 _DEFPROCV "R1-R4,R9,R10,R11"

 BL      Player_SlotFromHandle
 _ENDPROC VS

 CMP     R5,#-1
 BEQ     swi_SetPlayerInfo_Set
 LDR     R10,[R12,#Glb_SoundDrivers]
 CMP     R5,R10
 _ERROR  HI,Err_Bad_Sound_Origin

swi_SetPlayerInfo_Set
 STR     R1,[R11,#player_activewindow]
 STR     R2,[R11,#player_songcounter]
 CMP     R3,#256
 STRLS   R3,[R11,#player_songvolume]
 MOVLS   R9,#100
 MULLS   R3,R9,R3
 STRLS   R3,[R11,#player_songfinevolume]
 STRLS   R3,[R11,#player_songscalevolume]
 STR     R5,[R11,#player_soundorigin]
 ADD     R3,R11,#player_songname
 MOV     R1,#255
 CMP     R4,#0
 BEQ     swi_SetPlayerInfo_NameLoopEnd

swi_SetPlayerInfo_NameLoop
 LDRB    R2,[R4],#1
 CMP     R2,#0
 STRGTB  R2,[R3],#1
 SUBGTS  R1,R1,#1
 BGT     swi_SetPlayerInfo_NameLoop

swi_SetPlayerInfo_NameLoopEnd
 MOV     R2,#0
 STRB    R2,[R3],#1

 _ENDPROC

;-------------------------------------------------------------------------------
; SWI GetPlayerInfo
;
; In  - R0  player handle
;       R12 Global Header
;
; Out - R1  control panel window handle
;       R2  songs counter
;       R3  song volume
;       R4  song name ptr
;       R5  sound origin
;-------------------------------------------------------------------------------

 ALIGN
swi_GetPlayerInfo _FNAME
 _DEFPROCV "R11"

 BL      Player_SlotFromHandle
 _ENDPROC VS

 LDR     R1,[R11,#player_activewindow]
 LDR     R2,[R11,#player_songcounter]
 LDR     R3,[R11,#player_songvolume]
 ADD     R4,R11,#player_songname
 LDR     R5,[R11,#player_soundorigin]

 _ENDPROC

;-------------------------------------------------------------------------------
; SWI PlayerInfo
;
; In  - R0  player handle
;       R1  info code (+ 1<<31 to write)
;       R2  info value
;       R12 Global Header
;
; Out - R2  info value
;-------------------------------------------------------------------------------

 ALIGN
swi_PlayerInfo _FNAME
 _DEFPROCV "R1,R3-R11"

 BL      Player_SlotFromHandle
 _ENDPROC VS

 AND     R3,R1,#1<<31
 BIC     R1,R1,#1<<31
 CMP     R1,#(swi_PlayerInfo_tableend-swi_PlayerInfo_tablestart)/4
 ADDCC   PC,PC,R1,LSL#2
 B       swi_PlayerInfo_tableend
swi_PlayerInfo_tablestart
 B       swi_PlayerInfo_ActiveWindow
 B       swi_PlayerInfo_SongCounter
 B       swi_PlayerInfo_SongVolume
 B       swi_PlayerInfo_SongName
 B       swi_PlayerInfo_SoundOrigin
 B       swi_PlayerInfo_TaskHandle
 B       swi_PlayerInfo_SongPosition
 B       swi_PlayerInfo_SongStartPos
 B       swi_PlayerInfo_SongEndPos
 B       swi_PlayerInfo_SongFineVolume
 B       swi_PlayerInfo_SongScaleVolume
 B       swi_PlayerInfo_ModuleName
 B       swi_PlayerInfo_ModuleHandle
swi_PlayerInfo_tableend
 _ERRSWI unknown_info_code
unknown_info_code
 DCD     1
 = "Unknown information code",0

;-------------------------------------------------------------------------------

 ALIGN
swi_PlayerInfo_ActiveWindow
 CMP     R3,#0
 BEQ     swi_PlayerInfo_ActiveWindow_Read
 STR     R2,[R11,#player_activewindow]
swi_PlayerInfo_ActiveWindow_Read
 LDR     R2,[R11,#player_activewindow]

 _ENDPROC

;-------------------------------------------------------------------------------

swi_PlayerInfo_SongCounter
 CMP     R3,#0
 BEQ     swi_PlayerInfo_SongCounter_Read
 LDR     R3,[R11,#player_songcounter]
 CMP     R2,R3
 _ENDPROC EQ
 STR     R2,[R11,#player_songcounter]
 ; clear all song string codes
 MOV     R3,#0
 STRB    R3,[R11,#player_songname]
swi_PlayerInfo_SongCounter_Read
 LDR     R2,[R11,#player_songcounter]

 _ENDPROC

;-------------------------------------------------------------------------------

swi_PlayerInfo_SongVolume
 CMP     R3,#0
 BEQ     swi_PlayerInfo_SongVolume_Read
 CMP     R2,#256
 STRLS   R2,[R11,#player_songvolume]
 MOVLS   R9,#100
 MULLS   R2,R9,R2
 STRLS   R2,[R11,#player_songfinevolume]
 STRLS   R2,[R11,#player_songscalevolume]
swi_PlayerInfo_SongVolume_Read
 LDR     R2,[R11,#player_songvolume]

 _ENDPROC

;-------------------------------------------------------------------------------

swi_PlayerInfo_SongFineVolume
 CMP     R3,#0
 BEQ     swi_PlayerInfo_SongFineVolume_Read
 CMP     R2,#MaxVolume
 BHI     swi_PlayerInfo_SongFineVolume_Read
 STR     R2,[R11,#player_songfinevolume]
 MOV     R3,#100
 _DIVIDE R4,R2,R3,R5
 STR     R4,[R11,#player_songvolume]
swi_PlayerInfo_SongFineVolume_Read
 LDR     R2,[R11,#player_songfinevolume]

 _ENDPROC

;-------------------------------------------------------------------------------

swi_PlayerInfo_SongScaleVolume
 CMP     R3,#0
 BEQ     swi_PlayerInfo_SongScaleVolume_Read
 CMP     R2,#MaxVolume
 BHI     swi_PlayerInfo_SongScaleVolume_Read
 STR     R2,[R11,#player_songscalevolume]
swi_PlayerInfo_SongScaleVolume_Read
 LDR     R2,[R11,#player_songscalevolume]

 _ENDPROC

;-------------------------------------------------------------------------------

swi_PlayerInfo_SongName
 CMP     R3,#0
 BEQ     swi_PlayerInfo_SongName_Read

 ADD     R3,R11,#player_songname
 MOV     R1,#255
 CMP     R2,#0
 BEQ     swi_PlayerInfo_SongName_LoopEnd

swi_PlayerInfo_SongName_Loop
 LDRB    R4,[R2],#1
 CMP     R4,#0
 STRGTB  R4,[R3],#1
 SUBGTS  R1,R1,#1
 BGT     swi_PlayerInfo_SongName_Loop

swi_PlayerInfo_SongName_LoopEnd
 MOV     R2,#0
 STRB    R2,[R3],#1

swi_PlayerInfo_SongName_Read
 ADD     R2,R11,#player_songname

 _ENDPROC

;-------------------------------------------------------------------------------

swi_PlayerInfo_SoundOrigin
 CMP     R3,#0
 BEQ     swi_PlayerInfo_SoundOrigin_Read
 STR     R2,[R11,#player_soundorigin]
swi_PlayerInfo_SoundOrigin_Read
 LDR     R2,[R11,#player_soundorigin]

 _ENDPROC

;-------------------------------------------------------------------------------

 ALIGN
swi_PlayerInfo_TaskHandle
 CMP     R3,#0
 BEQ     swi_PlayerInfo_TaskHandle_Read
 STR     R2,[R11,#player_task]
swi_PlayerInfo_TaskHandle_Read
 LDR     R2,[R11,#player_task]

 _ENDPROC

;-------------------------------------------------------------------------------

swi_PlayerInfo_SongPosition
 CMP     R3,#0
 BEQ     swi_PlayerInfo_SongPosition_Read
 STR     R2,[R11,#player_songposition]
swi_PlayerInfo_SongPosition_Read
 LDR     R2,[R11,#player_songposition]

 _ENDPROC

;-------------------------------------------------------------------------------

swi_PlayerInfo_SongStartPos
 CMP     R3,#0
 BEQ     swi_PlayerInfo_SongStartPos_Read
 STR     R2,[R11,#player_songstartpos]
swi_PlayerInfo_SongStartPos_Read
 LDR     R2,[R11,#player_songstartpos]

 _ENDPROC

;-------------------------------------------------------------------------------

swi_PlayerInfo_SongEndPos
 CMP     R3,#0
 BEQ     swi_PlayerInfo_SongEndPos_Read
 STR     R2,[R11,#player_songendpos]
swi_PlayerInfo_SongEndPos_Read
 LDR     R2,[R11,#player_songendpos]

 _ENDPROC

;-------------------------------------------------------------------------------

swi_PlayerInfo_ModuleName
 CMP     R3,#0
 BEQ     swi_PlayerInfo_ModuleName_Read

 ADD     R3,R11,#player_modulename
 MOV     R1,#64
 CMP     R2,#0
 BEQ     swi_PlayerInfo_ModuleName_LoopEnd

swi_PlayerInfo_ModuleName_Loop
 LDRB    R4,[R2],#1
 CMP     R4,#0
 STRGTB  R4,[R3],#1
 SUBGTS  R1,R1,#1
 BGT     swi_PlayerInfo_ModuleName_Loop

swi_PlayerInfo_ModuleName_LoopEnd
 MOV     R2,#0
 STRB    R2,[R3],#1

swi_PlayerInfo_ModuleName_Read
 ADD     R2,R11,#player_modulename

 _ENDPROC

;-------------------------------------------------------------------------------

swi_PlayerInfo_ModuleHandle
 CMP     R3,#0
 BEQ     swi_PlayerInfo_ModuleHandle_Read
 STR     R2,[R11,#player_modulehandle]
swi_PlayerInfo_ModuleHandle_Read
 LDR     R2,[R11,#player_modulehandle]

 _ENDPROC

;-------------------------------------------------------------------------------
; SWI PlugInInfo
;
; In  - R12 Global Header
;     - R0  plug-in handle
;     - R1  code + bit 31 to set a value
;     - R2...  values
;
; Out - R2...  value
;-------------------------------------------------------------------------------

 ALIGN
swi_PlugInInfo _FNAME
 _DEFPROCV "R1,R3-R11"

 BL      PlugIn_SlotFromHandle
 _ENDPROC VS

 AND     R10,R1,#1<<31
 BIC     R1,R1,#1<<31
 CMP     R1,#(swi_PlugInInfo_tableend-swi_PlugInInfo_tablestart)/4
 ADDCC   PC,PC,R1,LSL #2
 B       swi_PlugInInfo_tableend
swi_PlugInInfo_tablestart
 B       swi_PlugInInfo_Flags
swi_PlugInInfo_tableend
 _ERRSWI unknown_info_code

;-------------------------------------------------------------------------------

swi_PlugInInfo_Flags
 CMP     R10,#0
 BEQ     swi_PlugInInfo_Flags_Read

 ; R2 = flags
 ; R3 = flag mask
 AND     R2,R2,#plugin_flag_mask
 AND     R3,R3,#plugin_flag_mask
 AND     R2,R2,R3
 LDR     R4,[R11,#plugin_flags]
 AND     R5,R4,R3
 BIC     R4,R4,R3
 ORR     R4,R4,R2
 ; check changes
 EOR     R5,R5,R2
 ; full screen flag?
 TST     R5,#plugin_flag_fullscreen
 BEQ     swi_PlugInInfo_Flags_FullScreen_End
 LDR     R6,[R12,#Glb_FullScreenPlugIn]
 ; May not have more than one full screen plugin
 CMP     R6,#-1
 TSTNE   R2,#plugin_flag_fullscreen
 _ERROR  NE,Err_OneFullScreenPlugIn
 ; Do we turn plugin to full screen
 TST     R2,#plugin_flag_fullscreen
 STRNE   R0,[R12,#Glb_FullScreenPlugIn]
 BLNE    SaveDesktop
 ; Was our plugin full screen and is removed?
 TST     R2,#plugin_flag_fullscreen
 BLEQ    RestoreDesktop
swi_PlugInInfo_Flags_FullScreen_End
 ; Save new values
 STR     R4,[R11,#plugin_flags]

swi_PlugInInfo_Flags_Read
 LDR     R2,[R11,#plugin_flags]
 AND     R2,R2,#plugin_flag_mask

 _ENDPROC

;-------------------------------------------------------------------------------
; SWI GetFullScreenPlugIn
;
; In  - R12 Global Header
;
; Out - R0  full screen plug-in handle or -1
;-------------------------------------------------------------------------------

 ALIGN
swi_GetFullScreenPlugIn _FNAME
 _DEFPROCV

 LDR     R0,[R12,#Glb_FullScreenPlugIn]

 _ENDPROC

;-------------------------------------------------------------------------------
; SWI PlayerHasPlugIn
;
; In  - R0  player handle
;       R12 Global Header
;
; Out - R1  1 true, 0 false
;-------------------------------------------------------------------------------

 ALIGN
swi_PlayerHasPlugIn _FNAME
 _DEFPROCV "R4,R9-R11"
 MOV     R4,R0

 ; check player validity
 BL      Player_SlotFromHandle
 _ENDPROC VS

 LDR     R1,[R12,#Glb_FullScreenPlugIn]
 CMP     R1,#-1
 MOVNE   R1,#1
 BNE     PlayerHasPlugIn_End

 ADD     R11,R12,#Glb_Plugins
 MOV     R10,#size_plugin
 MOV     R9,#max_plugins
PlayerHasPlugIn_Loop
 LDR     R0,[R11,#plugin_flags]
 TST     R0,#plugin_flag_loaded
 BEQ     PlayerHasPlugIn_Next
 LDR     R0,[R11,#plugin_player]
 CMP     R0,R4
 BNE     PlayerHasPlugIn_Next
 MOV     R1,#1
 B       PlayerHasPlugIn_End
PlayerHasPlugIn_Next
 ADD     R11,R11,R10
 SUBS    R9,R9,#1
 BGT     PlayerHasPlugIn_Loop
 MOV     R1,#0

PlayerHasPlugIn_End
 MOV     R0,R4
 _ENDPROC

;-------------------------------------------------------------------------------
; SWI PlayerFindPlugIn
;
; In  - R0  player handle
;       R1  plugin name
;       R12 Global Header
;
; Out - R2  plugin handle or -1 if not found
;-------------------------------------------------------------------------------

 ALIGN
swi_PlayerFindPlugIn _FNAME
 _DEFPROCV "R4,R6-R11"
 MOV     R4,R0

 ; check player validity
 BL      Player_SlotFromHandle
 _ENDPROC VS

 ADD     R11,R12,#Glb_Plugins
 MOV     R10,#size_plugin
 MOV     R2,#0
PlayerFindPlugIn_Loop
 LDR     R0,[R11,#plugin_flags]
 TST     R0,#plugin_flag_loaded
 BEQ     PlayerFindPlugIn_Next
 ; check player
 LDR     R0,[R11,#plugin_player]
 CMP     R0,R4
 BNE     PlayerFindPlugIn_Next
 ; check task name
 LDR     R0,[R11,#plugin_task]
 SWI     XTaskManager_TaskNameFromHandle
 BVS     PlayerFindPlugIn_Next
 MOV     R6,R1
PlayerFindPlugIn_NameLoop
 LDRB    R7,[R0],#1
 LDRB    R8,[R6],#1
 CMP     R7,R8
 BNE     PlayerFindPlugIn_Next
 CMP     R7,#32
 BGE     PlayerFindPlugIn_NameLoop

 ; it's this one
 B       PlayerFindPlugIn_End

PlayerFindPlugIn_Next
 ADD     R11,R11,R10
 ADD     R2,R2,#1
 CMP     R2,#max_plugins
 BLT     PlayerFindPlugIn_Loop

 ; not found
 MOV     R2,#-1

PlayerFindPlugIn_End
 MOV     R0,R4
 _ENDPROC

;-------------------------------------------------------------------------------
; SWI StopPlugIns
;
; In  - R0  player handle
;       R12 Global Header
;
; Out -
;-------------------------------------------------------------------------------

 ALIGN
MsgQuit
 DCD     20
 DCD     0
 DCD     0
 DCD     0
 DCD     0

 ALIGN
swi_StopPlugIns _FNAME
 _DEFPROCV "R1-R4,R9-R11"
 MOV     R4,R0

 ; check player validity
 BL      Player_SlotFromHandle
 _ENDPROC VS

 ; locate free player slot
 ADD     R11,R12,#Glb_Plugins
 MOV     R10,#size_plugin
 MOV     R9,#max_plugins
StopPlugIns_Loop
 LDR     R0,[R11,#plugin_flags]
 TST     R0,#plugin_flag_loaded
 BEQ     StopPlugIns_Next
 LDR     R0,[R11,#plugin_player]
 CMP     R0,R4
 BNE     StopPlugIns_Next
 ; Send Message_Quit
 MOV     R3,#-1
 LDR     R2,[R11,#plugin_task]
 ADR     R1,MsgQuit
 MOV     R0,#17
 SWI     XWimp_SendMessage
StopPlugIns_Next
 ADD     R11,R11,R10
 SUBS    R9,R9,#1
 BGT     StopPlugIns_Loop
StopPlugIns_LoopEnd

 MOV     R0,R4
 _ENDPROC

;-------------------------------------------------------------------------------
; SWI RemoteControl
;
; In  - R0  player handle
;       R1  command code
;       R2  command value
;       R12 Global Header
;
; Out -
;-------------------------------------------------------------------------------

 ALIGN
MsgDcdMisc
 DCD     &5327E

 ALIGN
swi_RemoteControl _FNAME
 _DEFPROCV "R1-R4,R9-R11"
 MOV     R4,R0

 ; check player validity
 BL      Player_SlotFromHandle
 _ENDPROC VS

 ASSERT  (Glb_TempEnd -Glb_Temp) >= 36
 ; Send Message_DCDMisc
 ADD     R10,R12,#Glb_Temp
 MOV     R0,#36
 STR     R0,[R10,#0]
 MOV     R0,#0
 STR     R0,[R10,#4]
 STR     R0,[R10,#8]
 STR     R0,[R10,#12]
 LDR     R0,MsgDcdMisc
 STR     R0,[R10,#16] ; action
 MOV     R0,#4
 STR     R0,[R10,#20] ; Reason code
 STR     R4,[R10,#24] ; controller nr
 STR     R1,[R10,#28] ; cmd
 STR     R2,[R10,#32] ; value
 MOV     R3,#-1
 LDR     R2,[R11,#player_task]
 MOV     R1,R10
 MOV     R0,#17
 SWI     XWimp_SendMessage

 MOV     R0,R4
 _ENDPROC

;-------------------------------------------------------------------------------
; SWI UpdatePlugInsPosition
;
; In  - R0  player handle
;       R12 Global Header
;
; Out -
;-------------------------------------------------------------------------------

 ALIGN
MsgUpdatePos
 DCD     20
 DCD     0
 DCD     0
 DCD     0
 DCD     &52780

 ALIGN
swi_UpdatePlugInsPosition _FNAME
 _DEFPROCV "R2-R4,R9-R11"
 MOV     R4,R0

 ; check player validity
 BL      Player_SlotFromHandle
 _ENDPROC VS

 ADD     R11,R12,#Glb_Plugins
 MOV     R10,#size_plugin
 MOV     R9,#max_plugins
UpdatePlugInsPosition_Loop
 LDR     R0,[R11,#plugin_flags]
 TST     R0,#plugin_flag_loaded
 BEQ     UpdatePlugInsPosition_Next
 LDR     R0,[R11,#plugin_player]
 CMP     R0,R4
 BNE     UpdatePlugInsPosition_Next
 ; Send Message
 MOV     R3,#-1
 LDR     R2,[R11,#plugin_task]
 ADR     R1,MsgUpdatePos
 MOV     R0,#17
 SWI     XWimp_SendMessage
UpdatePlugInsPosition_Next
 ADD     R11,R11,R10
 SUBS    R9,R9,#1
 BGT     UpdatePlugInsPosition_Loop
UpdatePlugInsPosition_LoopEnd

 MOV     R0,R4
 _ENDPROC

;-------------------------------------------------------------------------------
; SWI SetSaverTask
;
; In  - R0  screen saver task id
;       R12 Global Header
;
; Out -
;-------------------------------------------------------------------------------

 ALIGN
swi_SetSaverTask _FNAME
 _DEFPROCV

 STR     R0,[R12,#Glb_SaverTask]

 _ENDPROC

;-------------------------------------------------------------------------------
; SWI GetBufferRate
;
; In  - R0  player handle
;           or input source if bit 31 set
;       R12 Global Header
;
; Out - R1  rate (Hz)
;-------------------------------------------------------------------------------

 ALIGN
swi_GetBufferRate _FNAME
 _DEFPROCV "R2-R4,R9-R11"
 MOV     R10,R0

 TST     R0,#1<<31
 BICNE   R0,R0,#1<<31
 BNE     swi_GetBufferRate_FromSource
 BL      Player_SlotFromHandle
 _ENDPROC VS
 LDR     R0,[R11,#player_soundorigin]

swi_GetBufferRate_FromSource
 CMP     R0,#-1
 BEQ     swi_GetBufferRate_Dummy
 LDR     R9,[R12,#Glb_SoundDrivers]
 CMP     R0,R9
 _ERROR  HI,Err_Bad_Sound_Origin

 ADD     R11,R12,#Glb_Buffers
 MOV     R2,#Size_Driver
 MLA     R11,R9,R2,R11

 BL      hardware_bufferrate
 LDR     R1,[R11,#Driver_fmtSampleRate]
 B       swi_GetBufferRate_End

swi_GetBufferRate_Dummy
 MOV     R1,#1<<16 ; dummy value

swi_GetBufferRate_End
 MOV     R0,R10
 _ENDPROC

;-------------------------------------------------------------------------------
; In  - R0  source type
;       R11 Driver ptr
;       R12 Global Header
;-------------------------------------------------------------------------------
hardware_bufferrate
 _DEFPROC "R0-R12"
 CMP     R0,#0
 BEQ     hardware_bufferrate_DMA

hardware_bufferrate_SoundDriver
 MOV     R1,R0
 MOV     R0,#0
 SWI     &6DBC6 ; XSoundDriver_SampleRate
 MOVVS   R1,#1<<16 ; dummy value
 MOVVC   R1,R2,LSR #10
 STR     R1,[R11,#Driver_fmtSampleRate]
 _ENDPROC

hardware_bufferrate_DMA
 MOV     R0,#1
 SWI     XSound_SampleRate
 MOVVSS  R1,#0
 MOVVCS  R1,R2,LSR #10
 STRNE   R1,[R11,#Driver_fmtSampleRate]
 _ENDPROC NE

hardware_bufferrate_DMA8
 MOV     R0,#0
 MOV     R1,#0
 MOV     R2,#0
 MOV     R3,#0
 MOV     R4,#0
 SWI     XSound_Configure
 _LMOV   R0,1000000
 _DIVIDE R1,R0,R2,R3
 STR     R1,[R11,#Driver_fmtSampleRate]
 _ENDPROC

;-------------------------------------------------------------------------------
; SWI FillBuffer
;
; In  - R0  player handle
;           or input source if bit 31 set
;       R1  buffer
;       R2  buffer end
;       R12 Global Header
;
; Out - R2  filled buffer end
;-------------------------------------------------------------------------------

 ALIGN
swi_FillBuffer _FNAME
 _DEFPROCV "R1,R3-R11"
 MOV     R10,R0

 TST     R0,#1<<31
 BICNE   R0,R0,#1<<31
 MOVNE   R9,#MaxVolume
 BNE     swi_FillBuffer_FromSource
 BL      Player_SlotFromHandle
 _ENDPROC VS
 LDR     R0,[R11,#player_soundorigin]
 LDR     R9,[R11,#player_songscalevolume]

swi_FillBuffer_FromSource
 CMP     R0,#-1
 MOVEQ   R2,R1
 BEQ     swi_FillBuffer_End
 LDR     R8,[R12,#Glb_SoundDrivers]
 CMP     R0,R8
 _ERROR  HI,Err_Bad_Sound_Origin

swi_FillBuffer_FromSource_Go
 SUB     R3,R2,R1
 CMP     R3,#SmpSize
 MOVGT   R3,#SmpSize
 MOVS    R3,R3,LSR #3
 MOVLE   R3,#0
 BLGT    Buffer_Find
 ; R2, R11 altered

 MOV     R0,R9
 BL      FillSound32
 ADD     R2,R1,R3,LSL #3
 BL      CheckSound

swi_FillBuffer_End
 MOV     R0,R10
 _ENDPROC

;-------------------------------------------------------------------------------
; SWI FillRawBuffer
;
; In  - R0  player handle
;           or input source if bit 31 set
;       R1  buffer
;       R2  buffer end
;       R12 Global Header
;
; Out - R2  filled buffer end
;-------------------------------------------------------------------------------

 ALIGN
swi_FillRawBuffer _FNAME
 _DEFPROCV "R1,R3-R11"
 MOV     R10,R0

 MOV     R9,#MaxVolume

 TST     R0,#1<<31
 BICNE   R0,R0,#1<<31
 BNE     swi_FillBuffer_FromSource
 BL      Player_SlotFromHandle
 _ENDPROC VS
 LDR     R0,[R11,#player_soundorigin]
 B       swi_FillBuffer_FromSource

;-------------------------------------------------------------------------------
; SWI FillABuffer
;
; In  - R0  player handle
;           or input source if bit 31 set
;       R1  buffer
;       R2  buffer end
;       R3  flags
;             bits 0-1: size in (number-1) of bytes used to store a value
;             bit 2: rescale using player volume
;       R12 Global Header
;
; Out - R2  filled buffer end
;-------------------------------------------------------------------------------

Err_Bad_24bit_Not_Supported
 DCD     0
 = "24-bit sample buffers are not supported",0

 ALIGN
swi_FillABuffer _FNAME
 _DEFPROCV "R1,R3-R11"
 MOV     R10,R0

 TST     R0,#1<<31
 BICNE   R0,R0,#1<<31
 MOVNE   R9,#MaxVolume
 BNE     swi_FillABuffer_FromSource
 BL      Player_SlotFromHandle
 _ENDPROC VS
 LDR     R0,[R11,#player_soundorigin]
 TST     R3,#4
 LDRNE   R9,[R11,#player_songscalevolume]
 MOVEQ   R9,#MaxVolume

swi_FillABuffer_FromSource
 CMP     R0,#-1
 MOVEQ   R2,R1
 BEQ     swi_FillABuffer_End
 LDR     R8,[R12,#Glb_SoundDrivers]
 CMP     R0,R8
 _ERROR  HI,Err_Bad_Sound_Origin

swi_FillABuffer_FromSource_Go
 ; number of shifts required to get number of samples
 AND     R5,R3,#3
 CMP     R5,#2
 _ERROR  EQ,Err_Bad_24bit_Not_Supported
 ADDLO   R5,R5,#1

 SUB     R3,R2,R1
 CMP     R3,#SmpSize
 MOVGT   R3,#SmpSize
 MOVS    R3,R3,LSR R5
 MOVLE   R3,#0
 BLGT    Buffer_Find
 ; R2, R11 altered

 MOV     R0,R9
 BL      FillSound16

swi_FillABuffer_Check
 ADD     R2,R1,R3,LSL R5

swi_FillABuffer_End
 MOV     R0,R10
 _ENDPROC

;-------------------------------------------------------------------------------
; GetSoundDMA
;
; In  - R12 Global Header
;
; Out -
;-------------------------------------------------------------------------------
 ALIGN

GetSoundDMA _FNAME
 _DEFPROC "R0-R5"

 MOV     R0,#0
 SWI     XSound_Enable
 _ENDPROC VS
 CMP     R0,#2
 _ENDPROC NE
 MOV     R0,#0
 SWI     XSound_Mode
 MOVVS   R0,#0
 CMP     R0,#0
 MOVNE   R0,#1
 STR     R0,[R12,#Glb_DMAMode]
 BNE     GetSoundDMA16

GetSoundDMA8
 ; create little routine dynamically
 ASSERT  (Glb_TempEnd -Glb_Temp) >= (Handler8bit - Handler8bit_R12)
 ADD     R0,R12,#Glb_Temp
 ADR     R1,Handler8bit_R12
 MOV     R2,#(Handler8bit_End - Handler8bit_R12)
 BL      Mem_Move
 ; Save R12 for routine
 STR     R12,[R12,#Glb_Temp]
 ADD     R4,R12,#Glb_Temp+(Handler8bit - Handler8bit_R12)
 STR     R4,[R12,#Glb_DMAHandler8]
 ; Synchronise areas
 MOV     R0,#1
 ADD     R1,R12,#Glb_DMAHandler8
 ADD     R2,R1,#32 + (Handler8bit_End - Handler8bit_R12)
 SWI     XOS_SynchroniseCodeAreas
 MOV     R0,#0
 STR     R0,[R12,#Glb_DMABuffer1]
 STR     R0,[R12,#Glb_DMABuffer2]
 MOV     R1,#0
 MOV     R2,#0
 MOV     R3,#0
 MOV     R4,#0
 SWI     XSound_Configure
 ADD     R5,R12,#Glb_DMAConfig
 STMIA   R5,{R0-R4}
 ADD     R3,R12,#Glb_DMAHandler8
 MOV     R4,#0
 SWI     XSound_Configure
 SWI     XOS_ReadMonotonicTime
 ADD     R1,R0,#100
GetSoundDMA8_Loop
 SWI     XOS_ReadMonotonicTime
 SUBS    R2,R1,R0
 BLT     GetSoundDMA8_Loop_End
 LDR     R4,[R12,#Glb_DMABuffer2]
 CMP     R4,#0
 BEQ     GetSoundDMA8_Loop
GetSoundDMA8_Loop_End
 ADD     R5,R12,#Glb_DMAConfig
 LDMIA   R5,{R0-R4}
 SWI     XSound_Configure
 B       GetSoundDMA_End

GetSoundDMA16
 MOV     R0,#1
 ADR     R1,Handler16
 MOV     R2,R12
 SWI     XSound_LinearHandler
 ADD     R5,R12,#Glb_DMAConfig
 STMIA   R5,{R1-R2}
 SWI     XOS_ReadMonotonicTime
 ADD     R1,R0,#100
GetSoundDMA16_Loop
 SWI     XOS_ReadMonotonicTime
 SUBS    R2,R1,R0
 BLT     GetSoundDMA16_Loop_End
 LDR     R4,[R12,#Glb_DMABuffer2]
 CMP     R4,#0
 BEQ     GetSoundDMA16_Loop
GetSoundDMA16_Loop_End
 ADD     R5,R12,#Glb_DMAConfig
 LDMIA   R5,{R1-R2}
 MOV     R0,#1
 SWI     XSound_LinearHandler

GetSoundDMA_End
 LDR     R0,[R12,#Glb_DMABuffer1]
 LDR     R1,[R12,#Glb_DMABuffer2]
 CMP     R0,R1
 MOVHI   R2,R0
 MOVHI   R0,R1
 MOVHI   R1,R2
 STR     R0,[R12,#Glb_DMABuffer1]
 STR     R1,[R12,#Glb_DMABuffer2]

 _ENDPROC

Handler8bit_R12
 DCD     0
Handler8bit
 MOV     R0,R12
 LDR     R12,Handler8bit_R12
 LDR     R1,[R12,#Glb_DMABuffer1]
 CMP     R1,#0
 STREQ   R0,[R12,#Glb_DMABuffer1]
 STRNE   R0,[R12,#Glb_DMABuffer2]
 TEQ     PC,PC
 MOVEQ   PC,R14
 MOVNES  PC,R14
Handler8bit_End
Handler16
 LDR     R2,[R0,#Glb_DMABuffer1]
 CMP     R2,#0
 STREQ   R1,[R0,#Glb_DMABuffer1]
 STRNE   R1,[R0,#Glb_DMABuffer2]
 _RETURNFlags

SmpLogToLin
 DCB &00, &00, &00, &00, &00, &00, &00, &00
 DCB &00, &00, &00, &00, &00, &00, &00, &00
 DCB &00, &00, &00, &00, &00, &00, &00, &00
 DCB &00, &00, &00, &00, &00, &00, &00, &00
 DCB &01, &FF, &01, &FF, &01, &FF, &01, &FF
 DCB &01, &FF, &01, &FF, &01, &FF, &01, &FF
 DCB &01, &FF, &01, &FF, &01, &FF, &01, &FF
 DCB &01, &FF, &01, &FF, &01, &FF, &01, &FF
 DCB &02, &FE, &02, &FE, &02, &FE, &02, &FE
 DCB &02, &FE, &02, &FE, &02, &FE, &02, &FE
 DCB &02, &FE, &03, &FD, &03, &FD, &03, &FD
 DCB &03, &FD, &03, &FD, &03, &FD, &03, &FD
 DCB &04, &FC, &04, &FC, &04, &FC, &04, &FC
 DCB &04, &FC, &05, &FB, &05, &FB, &05, &FB
 DCB &05, &FB, &06, &FA, &06, &FA, &06, &FA
 DCB &06, &FA, &07, &F9, &07, &F9, &07, &F9
 DCB &08, &F8, &08, &F8, &09, &F7, &09, &F7
 DCB &09, &F7, &0A, &F6, &0A, &F6, &0B, &F5
 DCB &0B, &F5, &0C, &F4, &0C, &F4, &0D, &F3
 DCB &0D, &F3, &0E, &F2, &0F, &F1, &0F, &F1
 DCB &10, &F0, &11, &EF, &12, &EE, &12, &EE
 DCB &13, &ED, &14, &EC, &15, &EB, &16, &EA
 DCB &17, &E9, &18, &E8, &19, &E7, &1A, &E6
 DCB &1B, &E5, &1D, &E3, &1E, &E2, &1F, &E1
 DCB &21, &DF, &22, &DE, &24, &DC, &25, &DB
 DCB &27, &D9, &29, &D7, &2A, &D6, &2C, &D4
 DCB &2E, &D2, &30, &D0, &33, &CD, &35, &CB
 DCB &37, &C9, &3A, &C6, &3C, &C4, &3F, &C1
 DCB &42, &BE, &45, &BB, &48, &B8, &4B, &B5
 DCB &4E, &B2, &52, &AE, &55, &AB, &59, &A7
 DCB &5D, &A3, &61, &9F, &66, &9F, &6A, &96
 DCB &6F, &91, &74, &8C, &79, &87, &7F, &80

;-------------------------------------------------------------------------------
; FillSound16
;
; In  - R0  Scale [0-256*100]
;       R1  out buffer
;       R2  in buffer /* 16-bit stereo linear signed */
;       R3  nr of samples
;       R5  LOG2(stereo sample size)
;       R11 Driver ptr
;       R12 Global Header
;
; Out -
;-------------------------------------------------------------------------------
 ALIGN

FillSound16 _FNAME
 ADD     PC,PC,R5,LSL #2
 MOV     R0,R0
 MOV     R0,R0
 B       FillSound16_8
 B       FillSound16_16
 MOV     R0,R0
 B       FillSound16_32

FillSound16_8 _FNAME
 _DEFPROC "R0-R10"
 ; Round R3
 BIC     R3,R3,#1

 ; Get scaling factor
 CMP     R0,#0
 MOVLE   R4,#0
 BLE     FillSound16_8_EndVolume
 MOV     R5,#MaxVolume<<12
 _DIVIDE R4,R5,R0,R6
FillSound16_8_EndVolume
 MOV     R8,#&08000000
 RSB     R9,R8,#0
 SUB     R8,R8,#1

 ; Rewind from end of filled part
 LDR     R7,[R11,#Driver_DataOffset]
 SUBS    R7,R7,R3
 ADDLT   R7,R7,#SmpSize
 ADD     R0,R2,R7,LSL #2

 ; Get number of values to fill from end and start of buffer
 RSB     R7,R7,#SmpSize
 CMP     R7,R3
 MOVGT   R7,R3
 SUB     R3,R3,R7

 ; Fill from end of filled buffer
 CMP     R7,#0
 BLE     FillSound16_8_Loop1_End
FillSound16_8_Loop1
 LDR     R5,[R0],#4
 LDR     R10,[R0],#4
 MOV     R6,R5,ASR #16
 MOV     R5,R5,LSL #16
 MOV     R5,R5,ASR #16
 MUL     R6,R4,R6
 MUL     R5,R4,R5
 CMP     R6,R8
 MOVGT   R6,R8
 CMP     R6,R9
 MOVLT   R6,R9
 CMP     R5,R8
 MOVGT   R5,R8
 CMP     R5,R9
 MOVLT   R5,R9
 AND     R5,R5,#&FF00000
 AND     R6,R6,#&FF00000
 MOV     R5,R5,LSR #20
 ORR     R5,R5,R6,LSR #12

 MOV     R6,R10,ASR #16
 MOV     R10,R10,LSL #16
 MOV     R10,R10,ASR #16
 MUL     R6,R4,R6
 MUL     R10,R4,R10
 CMP     R6,R8
 MOVGT   R6,R8
 CMP     R6,R9
 MOVLT   R6,R9
 CMP     R10,R8
 MOVGT   R10,R8
 CMP     R10,R9
 MOVLT   R10,R9
 AND     R10,R10,#&FF00000
 AND     R6,R6,#&FF00000
 ORR     R5,R5,R10,LSR #4
 ORR     R5,R5,R6,LSL #4
 STR     R5,[R1],#4
 SUBS    R7,R7,#2
 BGT     FillSound16_8_Loop1
FillSound16_8_Loop1_End

 ; Fill from end of buffer
 CMP     R3,#0
 BLE     FillSound16_8_Loop2_End
FillSound16_8_Loop2
 LDR     R5,[R2],#4
 LDR     R10,[R2],#4
 MOV     R6,R5,ASR #16
 MOV     R5,R5,LSL #16
 MOV     R5,R5,ASR #16
 MUL     R6,R4,R6
 MUL     R5,R4,R5
 CMP     R6,R8
 MOVGT   R6,R8
 CMP     R6,R9
 MOVLT   R6,R9
 CMP     R5,R8
 MOVGT   R5,R8
 CMP     R5,R9
 MOVLT   R5,R9
 AND     R5,R5,#&FF00000
 AND     R6,R6,#&FF00000
 MOV     R5,R5,LSR #20
 ORR     R5,R5,R6,LSR #12

 MOV     R6,R10,ASR #16
 MOV     R10,R10,LSL #16
 MOV     R10,R10,ASR #16
 MUL     R6,R4,R6
 MUL     R10,R4,R10
 CMP     R6,R8
 MOVGT   R6,R8
 CMP     R6,R9
 MOVLT   R6,R9
 CMP     R10,R8
 MOVGT   R10,R8
 CMP     R10,R9
 MOVLT   R10,R9
 AND     R10,R10,#&FF00000
 AND     R6,R6,#&FF00000
 ORR     R5,R5,R10,LSR #4
 ORR     R5,R5,R6,LSL #4
 STR     R5,[R1],#4
 SUBS    R3,R3,#2
 BGT     FillSound16_8_Loop2
FillSound16_8_Loop2_End

 _ENDPROC

FillSound16_16 _FNAME
 _DEFPROC "R0-R9"

 ; Get scaling factor
 CMP     R0,#0
 MOVLE   R4,#0
 BLE     FillSound16_16_EndVolume
 MOV     R5,#MaxVolume<<12
 _DIVIDE R4,R5,R0,R6
FillSound16_16_EndVolume
 MOV     R8,#&08000000
 RSB     R9,R8,#0
 SUB     R8,R8,#1

 ; Rewind from end of filled part
 LDR     R7,[R11,#Driver_DataOffset]
 SUBS    R7,R7,R3
 ADDLT   R7,R7,#SmpSize
 ADD     R0,R2,R7,LSL #2

 ; Get number of values to fill from end and start of buffer
 RSB     R7,R7,#SmpSize
 CMP     R7,R3
 MOVGT   R7,R3
 SUB     R3,R3,R7

 ; Fill from end of filled buffer
 CMP     R7,#0
 BLE     FillSound16_16_Loop1_End
FillSound16_16_Loop1
 LDR     R5,[R0],#4
 MOV     R6,R5,ASR #16
 MOV     R5,R5,LSL #16
 MOV     R5,R5,ASR #16
 MUL     R6,R4,R6
 MUL     R5,R4,R5
 CMP     R6,R8
 MOVGT   R6,R8
 CMP     R6,R9
 MOVLT   R6,R9
 CMP     R5,R8
 MOVGT   R5,R8
 CMP     R5,R9
 MOVLT   R5,R9
 MOV     R6,R6,LSR #12
 MOV     R5,R5,LSL #4
 MOV     R5,R5,LSR #16
 ORR     R6,R5,R6,LSL #16
 STR     R6,[R1],#4
 SUBS    R7,R7,#1
 BGT     FillSound16_16_Loop1
FillSound16_16_Loop1_End

 ; Fill from start of buffer
 CMP     R3,#0
 BLE     FillSound16_16_Loop2_End
FillSound16_16_Loop2
 LDR     R5,[R2],#4
 MOV     R6,R5,ASR #16
 MOV     R5,R5,LSL #16
 MOV     R5,R5,ASR #16
 MUL     R6,R4,R6
 MUL     R5,R4,R5
 CMP     R6,R8
 MOVGT   R6,R8
 CMP     R6,R9
 MOVLT   R6,R9
 CMP     R5,R8
 MOVGT   R5,R8
 CMP     R5,R9
 MOVLT   R5,R9
 MOV     R6,R6,LSR #12
 MOV     R5,R5,LSL #4
 MOV     R5,R5,LSR #16
 ORR     R6,R5,R6,LSL #16
 STR     R6,[R1],#4
 SUBS    R3,R3,#1
 BGT     FillSound16_16_Loop2
FillSound16_16_Loop2_End

 _ENDPROC

FillSound16_32 _FNAME
 _DEFPROC "R0-R9"

 ; Get scaling factor
 CMP     R0,#0
 MOVLE   R4,#0
 BLE     FillSound16_32_EndVolume
 MOV     R5,#MaxVolume<<12
 _DIVIDE R4,R5,R0,R6
FillSound16_32_EndVolume
 MOV     R8,#&08000000
 RSB     R9,R8,#0
 SUB     R8,R8,#1

 ; Rewind from end of filled part
 LDR     R7,[R11,#Driver_DataOffset]
 SUBS    R7,R7,R3
 ADDLT   R7,R7,#SmpSize
 ADD     R0,R2,R7,LSL #2

 ; Get number of values to fill from end and start of buffer
 RSB     R7,R7,#SmpSize
 CMP     R7,R3
 MOVGT   R7,R3
 SUB     R3,R3,R7

 ; Fill from end of buffer
 CMP     R7,#0
 BLE     FillSound16_32_Loop1_End
FillSound16_32_Loop1
 LDR     R5,[R0],#4
 MOV     R6,R5,ASR #16
 MOV     R5,R5,LSL #16
 MOV     R5,R5,ASR #16
 MUL     R6,R4,R6
 MUL     R5,R4,R5
 CMP     R6,R8
 MOVGT   R6,R8
 CMP     R6,R9
 MOVLT   R6,R9
 CMP     R5,R8
 MOVGT   R5,R8
 CMP     R5,R9
 MOVLT   R5,R9
 MOV     R5,R5,LSL #4
 MOV     R6,R6,LSL #4
 STR     R5,[R1],#4
 STR     R6,[R1],#4
 SUBS    R7,R7,#1
 BGT     FillSound16_32_Loop1
FillSound16_32_Loop1_End

 ; Fill from end of buffer
 CMP     R3,#0
 BLE     FillSound16_32_Loop2_End
FillSound16_32_Loop2
 LDR     R5,[R2],#4
 MOV     R6,R5,ASR #16
 MOV     R5,R5,LSL #16
 MOV     R5,R5,ASR #16
 MUL     R6,R4,R6
 MUL     R5,R4,R5
 CMP     R6,R8
 MOVGT   R6,R8
 CMP     R6,R9
 MOVLT   R6,R9
 CMP     R5,R8
 MOVGT   R5,R8
 CMP     R5,R9
 MOVLT   R5,R9
 MOV     R5,R5,LSL #4
 MOV     R6,R6,LSL #4
 STR     R5,[R1],#4
 STR     R6,[R1],#4
 SUBS    R3,R3,#1
 BGT     FillSound16_32_Loop2
FillSound16_32_Loop2_End

 _ENDPROC

;-------------------------------------------------------------------------------
; FillSound32
;
; In  - R0  Scale [0-256*100]
;       R1  buffer /* 16-bit stereo linear signed but stored as words */
;       R2  buffer /* 16-bit stereo linear signed */
;       R3  nr of samples
;       R11 Driver ptr
;       R12 Global Header
;
; Out -
;-------------------------------------------------------------------------------
 ALIGN

FillSound32 _FNAME
 _DEFPROC "R0-R7"

 ; Get scaling factor
 CMP     R0,#0
 MOVLE   R4,#0
 BLE     FillSound32_EndVolume
 MOV     R5,#MaxVolume<<16
 _DIVIDE R4,R5,R0,R6
FillSound32_EndVolume

 ; Rewind from end of filled part
 LDR     R7,[R11,#Driver_DataOffset]
 SUBS    R7,R7,R3
 ADDLT   R7,R7,#SmpSize
 ADD     R0,R2,R7,LSL #2

 ; Get number of values to fill from end and start of buffer
 RSB     R7,R7,#SmpSize
 CMP     R7,R3
 MOVGT   R7,R3
 SUB     R3,R3,R7

 ; Fill from end of filled buffer
 CMP     R7,#0
 BLE     FillSound32_Loop1_End
FillSound32_Loop1
 LDR     R5,[R0],#4
 MOV     R6,R5,ASR #16
 MOV     R5,R5,LSL #16
 MOV     R5,R5,ASR #16
 MUL     R6,R4,R6
 MUL     R5,R4,R5
 MOV     R6,R6,ASR #16
 MOV     R5,R5,ASR #16
 STR     R6,[R1],#4
 STR     R5,[R1],#4
 SUBS    R7,R7,#1
 BGT     FillSound32_Loop1
FillSound32_Loop1_End

 ; Fill from start of buffer
 CMP     R3,#0
 BLE     FillSound32_Loop2_End
FillSound32_Loop2
 LDR     R5,[R2],#4
 MOV     R6,R5,ASR #16
 MOV     R5,R5,LSL #16
 MOV     R5,R5,ASR #16
 MUL     R6,R4,R6
 MUL     R5,R4,R5
 MOV     R6,R6,ASR #16
 MOV     R5,R5,ASR #16
 STR     R6,[R1],#4
 STR     R5,[R1],#4
 SUBS    R3,R3,#1
 BGT     FillSound32_Loop2
FillSound32_Loop2_End

 _ENDPROC

;-------------------------------------------------------------------------------
; CheckSound
;
; In  - R1  buffer /* 16-bit stereo linear signed but stored as words */
;       R2  buffer end
;       R12 Global Header
;
; Out -
;-------------------------------------------------------------------------------
 ALIGN

CheckSound _FNAME
 _DEFPROC "R0-R7"

 ; Get max wave value
 MOV     R4,R1
 MOV     R7,#&7F00
 ORR     R7,R7,#&FF
 MOV     R3,R7
CheckSound_Read_Loop
 LDR     R0,[R4],#4
 CMP     R0,#0
 RSBLT   R0,R0,#0
 CMP     R3,R0
 MOVLT   R3,R0
 LDR     R0,[R4],#4
 CMP     R0,#0
 RSBLT   R0,R0,#0
 CMP     R3,R0
 MOVLT   R3,R0
 CMP     R4,R2
 BLO     CheckSound_Read_Loop

 CMP     R3,R7
 _ENDPROC LE

 ; Invert value
 MOV     R4,R7,LSL #15
 MOV     R3,R3,LSR #1
 _DIVIDE R5,R4,R3,R6
 ; store new attenuation

CheckSound_Write_Loop
 LDR     R4,[R1]
 MUL     R4,R5,R4
 MOV     R4,R4,ASR #16
 STR     R4,[R1],#4
 CMP     R1,R2
 BLO     CheckSound_Write_Loop

 _ENDPROC

;-------------------------------------------------------------------------------
; SaveDesktop
;
; In  - R12 Global Header
;
; Out -
;-------------------------------------------------------------------------------
 ALIGN
DeliriumDisable
 = "Delirium_Disable",0
DeliriumEnable
 = "Delirium_Enable",0
DeliriumVar
 = "Delirium$$Enabled",0
 ALIGN
VDU_SizeList
 DCD     150
 DCD     -1
VDU_List
 DCD     9
 DCD     -1
WimpAutoFronting_Disable
 = "Configure WimpAutoFrontIconbar Off",0
WimpAutoFronting_Enable
 = "Configure WimpAutoFrontIconbar On",0

 ALIGN
SaveDesktop _FNAME
 _DEFPROC "R0-R11"

 ; save screen memory
 ADR     R0,VDU_SizeList
 ADD     R1,R12,#Glb_DesktopScreenMemory
 SWI     XOS_ReadVduVariables
; MOV     R0,#2
; SWI     XOS_ReadDynamicArea
; STR     R1,[R12,#Glb_DesktopScreenMemory]

 ; save palette
 ADR     R0,VDU_List
 ADD     R1,R12,#Glb_DesktopLog2BPP
 SWI     XOS_ReadVduVariables
 LDR     R0,[R12,#Glb_DesktopLog2BPP]
 CMP     R0,#3
 BHI     SaveDesktop_EndPalette
 MOV     R0,#-1
 MOV     R1,#-1
 ADD     R2,R12,#Glb_DesktopPalette
 MOV     R3,#1024
 MOV     R3,#0
 SWI     XColourTrans_ReadPalette
SaveDesktop_EndPalette

 ; save screen mode
 MOV     R0,#1
 SWI     XWimp_ReadSysInfo
 STR     R0,[R12,#Glb_DesktopMode]

 ; don't do much if screen saver
 LDR     R0,[R12,#Glb_SaverTask]
 CMP     R0,#0
 BNE     SaveDesktop_SaverEnd

 ; save Delirium saver status and disable
 ADR     R0,DeliriumVar
 ADD     R1,R12,#Glb_DeliriumValue
 MOV     R2,#0
 STRB    R2,[R1]
 MOV     R2,#4
 MOV     R3,#0
 MOV     R4,#0
 SWI     XOS_ReadVarVal
 LDRB    R2,[R1]
 CMP     R2,#&31 ; 1
 ADREQ   R0,DeliriumDisable
 SWIEQ   XOS_CLI

 ; save screenblanker delay and disable
 MOV     R0,#4
 SWI     XScreenBlanker_Control
 ; bug seems to return R1 in sec not centisec, R0 seem to be in 1/5 sec
 ; I will test if R0 > R1 just in case it's corrected in the fututre
 CMP     R0,R1
 MOVGT   R0,#100
 MULGT   R1,R0,R1
 STR     R1,[R12,#Glb_BlankerDelay]
 MOV     R0,#3
 MOV     R1,#0
 SWI     XScreenBlanker_Control

 ; mark info as saved
 LDR     R11,[R12,#Glb_Flags]
 ORR     R11,R11,#glb_flag_RestoreSaver
 STR     R11,[R12,#Glb_Flags]
SaveDesktop_SaverEnd

 ; save iconbar autofronting
 MOV     R0,#161
 MOV     R1,#22
 SWI     XOS_Byte
 ANDS    R2,R2,#&80
 STR     R2,[R12,#Glb_WimpAutoFronting]
 ADREQ   R0,WimpAutoFronting_Disable
 SWIEQ   XOS_CLI

 _ENDPROC

;-------------------------------------------------------------------------------
; RestoreDesktop
;
; In  - R12 Global Header
;
; Out -
;-------------------------------------------------------------------------------

 ALIGN
RestoreDesktop _FNAME
 _DEFPROC "R0-R11"

 ; restore iconbar autofronting
 LDR     R2,[R12,#Glb_WimpAutoFronting]
 CMP     R2,#0
 ADREQL  R0,WimpAutoFronting_Enable
 SWIEQ   XOS_CLI

 ; reset possible screen saver info
 LDR     R11,[R12,#Glb_Flags]
 TST     R11,#glb_flag_RestoreSaver
 BIC     R11,R11,#glb_flag_RestoreSaver
 STR     R11,[R12,#Glb_Flags]
 BEQ     RestoreDesktop_SaverEnd

RestoreDesktop_Saver
 ; restore screenblanker delay
 MOV     R0,#3
 LDR     R1,[R12,#Glb_BlankerDelay]
 SWI     XScreenBlanker_Control

 ; restore Delirium saver
 ADD     R1,R12,#Glb_DeliriumValue
 LDRB    R2,[R1]
 CMP     R2,#&31 ; 1
 ADREQL  R0,DeliriumEnable
 SWIEQ   XOS_CLI
RestoreDesktop_SaverEnd

 ; ensure screen memory
 MOV     R0,#2
 SWI     XOS_ReadDynamicArea
 LDR     R2,[R12,#Glb_DesktopScreenMemory]
 SUBS    R1,R2,R1
 SWIGT   XOS_ChangeDynamicArea
 _ENDPROC VS

 ; reset mode
 LDR     R0,[R12,#Glb_DesktopMode]
 SWI     XWimp_SetMode
 _ENDPROC VS

 ; reset screen memory
 MOV     R0,#2
 SWI     XOS_ReadDynamicArea
 LDR     R2,[R12,#Glb_DesktopScreenMemory]
 SUBS    R1,R2,R1
 SWI     XOS_ChangeDynamicArea
 _ENDPROC VS

 ; restore palette
 LDR     R0,[R12,#Glb_DesktopLog2BPP]
 CMP     R0,#3
 BHI     RestoreDesktop_EndPalette
 MOV     R0,#-1
 MOV     R1,#-1
 ADD     R2,R12,#Glb_DesktopPalette
 MOV     R3,#0
 MOV     R3,#0
 SWI     XColourTrans_WritePalette
RestoreDesktop_EndPalette

 _ENDPROC

;-------------------------------------------------------------------------------
; Buffers_Fill
;
; In  - R12 Global Header
;
; Out -
;-------------------------------------------------------------------------------
 ALIGN

Buffers_Fill _FNAME
 _DEFPROC "R0-R12"

 ; We may be called from within a long buffer filling interrupt routine
 ; which would have switched to SVC and enabled interrupts
 ; so we must wait till the system is about to give the control back
 ; to the user before doing the job
 LDR     R0,[R12,#Glb_Flags]
 TST     R0,#glb_flag_CallBack
 BNE     Buffers_Fill_Next
 ORR     R0,R0,#glb_flag_CallBack
 STR     R0,[R12,#Glb_Flags]

 ADRL    R0,Buffers_CallBack
 MOV     R1,R12
 SWI     XOS_AddCallBack

Buffers_Fill_Next
 ; Request next buffer filling
 MOV     R0,#1
 ADRL    R1,Buffers_Fill
 MOV     R2,R12
 SWI     XOS_CallAfter

 _ENDPROC

Buffers_CallBack _FNAME
 _DEFPROC "R0-R12"

 BL      Buffers_CheckDMA

 LDR     R0,[R12,#Glb_Flags]
 BIC     R0,R0,#glb_flag_CallBack
 STR     R0,[R12,#Glb_Flags]

 _ENDPROC

;-------------------------------------------------------------------------------
; Buffers_CheckDMA
;
; In  - R12 Global Header
;
; Out -
;-------------------------------------------------------------------------------
 ALIGN
Buffers_CheckDMA _FNAME
 _DEFPROC "R0-R11"

 SWI     XOS_IntOff

 ; reentrency
 LDR     R0,[R12,#Glb_Flags]
 ORR     R1,R0,#glb_flag_DMAFilling
 STR     R1,[R12,#Glb_Flags]
 TST     R0,#glb_flag_DMAFilling
 BNE     Buffers_CheckDMA_End2

 ; DMA buffer to monitor?
 LDR     R0,[R12,#Glb_DMABuffer1]
 CMP     R0,#0
 BEQ     Buffers_CheckDMA_End
 LDR     R0,[R12,#Glb_DMABuffer2]
 CMP     R0,#0
 BEQ     Buffers_CheckDMA_End

 ADD     R11,R12,#Glb_Buffers

 ; Do nothing if buffer was not read since at least 1 second
 LDR     R0,[R11,#Driver_Time]
 SUBS    R0,R0,#1
 BLT     Buffers_CheckDMA_End
 STR     R0,[R11,#Driver_Time]

 ; update samplerate
 MOV     R0,#0
 BL      hardware_bufferrate

 BL      Timer_GetTime
 LDR     R2,[R11,#Driver_DataOffset]
 STR     R2,[R11,#Driver_OldDataOffset]
 _PUSH   "R0,R1"

 ; counter
 MOV     R10,#2
 LDR     R1,[R12,#Glb_DMACurrentBuf]
 CMP     R1,#2
 BEQ     Buffers_CheckDMA_2
Buffers_CheckDMA_1
 SUBS    R10,R10,#1
 BLT     Buffers_CheckDMA_Done
 ; changed ?
 LDR     R0,[R12,#Glb_DMABuffer1]
 LDR     R1,[R12,#Glb_DMABufVal11]
 LDR     R2,[R12,#Glb_DMABufVal12]
 LDR     R3,[R0,#4]
 LDR     R4,[R0,#60]
 ; yes
 CMP     R1,R3
 CMPEQ   R2,R4
 BNE     Buffers_CheckDMA_1_Fill
 ; sound, so probably no
 CMP     R3,#0
 CMPEQ   R4,#0
 BNE     Buffers_CheckDMA_Done
 ; probably no sound, must fill in from time to time
 SWI     XOS_ReadMonotonicTime
 LDR     R1,[R11,#Driver_RecordTimeEnd+0] ; cs
 SUBS    R0,R0,R1
 BLE     Buffers_CheckDMA_Done
Buffers_CheckDMA_1_Fill
 STR     R3,[R12,#Glb_DMABufVal11]
 STR     R4,[R12,#Glb_DMABufVal12]
 LDR     R0,[R12,#Glb_DMABuffer1]
 BL      Buffers_FillDMA
 MOV     R1,#2
 STR     R1,[R12,#Glb_DMACurrentBuf]

Buffers_CheckDMA_2
 SUBS    R10,R10,#1
 BLT     Buffers_CheckDMA_Done
 ; changed ?
 LDR     R0,[R12,#Glb_DMABuffer2]
 LDR     R1,[R12,#Glb_DMABufVal21]
 LDR     R2,[R12,#Glb_DMABufVal22]
 LDR     R3,[R0,#4]
 LDR     R4,[R0,#60]
 ; yes
 CMP     R1,R3
 CMPEQ   R2,R4
 BNE     Buffers_CheckDMA_2_Fill
 ; sound, so probably no
 CMP     R3,#0
 CMPEQ   R4,#0
 BNE     Buffers_CheckDMA_Done
 ; probably no sound, must fill in from time to time
 SWI     XOS_ReadMonotonicTime
 LDR     R1,[R11,#Driver_RecordTimeEnd+0] ; cs
 SUBS    R0,R0,R1
 BLE     Buffers_CheckDMA_Done
Buffers_CheckDMA_2_Fill
 STR     R3,[R12,#Glb_DMABufVal21]
 STR     R4,[R12,#Glb_DMABufVal22]
 LDR     R0,[R12,#Glb_DMABuffer2]
 BL      Buffers_FillDMA
 MOV     R1,#1
 STR     R1,[R12,#Glb_DMACurrentBuf]
 B       Buffers_CheckDMA_1

Buffers_CheckDMA_Done
 _PULL    "R0,R1"
 ; unchanged ?
 LDR     R3,[R11,#Driver_DataOffset]
 LDR     R4,[R11,#Driver_OldDataOffset]
 CMP     R3,R4
 BEQ     Buffers_CheckDMA_End
 ; compute time to end of buffer
 STR     R0,[R11,#Driver_RecordTimeStart+0] ; cs
 STR     R1,[R11,#Driver_RecordTimeStart+4] ; ticks
 LDR     R2,[R11,#Driver_fmtSampleRate]
 MOV     R5,#100
 LDR     R6,[R12,#Glb_TimerTicksPerCS]
 ; ticks per seconds
 MUL     R6,R5,R6
 ; nr samples added (32 bits)
 SUBS    R3,R3,R4
 ADDLT   R3,R3,#SmpSize
 ; * ticks per seconds (-> 64 bits)
 _UMUL64 R4,R5,R6,R3,R7
 ; / sample rate = nr ticks (-> 32 bits)
 _UDIV64 R6,R4,R5,R2,R7
 ; ticks end += nr ticks
 ADD     R1,R1,R6
 ; nr ticks / nr of ticks per cs = nr cs
 LDR     R4,[R12,#Glb_TimerTicksPerCS]
 MOV     R5,R1
 MOV     R6,R4
 _DIVIDE R7,R5,R6,R8
 ; cs end += nr cs
 ADD     R0,R0,R7
 STR     R0,[R11,#Driver_RecordTimeEnd+0] ; cs
; ticks end -= (nr of ticks per cs * nr cs)
 MUL     R7,R4,R7
 SUB     R1,R1,R7
 STR     R1,[R11,#Driver_RecordTimeEnd+4] ; ticks

Buffers_CheckDMA_End
 ; reentrency
 LDR     R0,[R12,#Glb_Flags]
 BIC     R0,R0,#glb_flag_DMAFilling
 STR     R0,[R12,#Glb_Flags]
Buffers_CheckDMA_End2
 SWI     XOS_IntOn

 _ENDPROC

;-------------------------------------------------------------------------------
; Buffers_FillDMA
;
; In  - R0  DMA buffer ptr
;       R12 Global Header
;
; Out -
;-------------------------------------------------------------------------------
 ALIGN

Buffers_FillDMA _FNAME
 _DEFPROC "R0-R11"

 ADD     R11,R12,#Glb_Buffers

 MOV     R10,R0
 ; Fill how much of these?
 ; R0 nr of channels
 ; R1 nr of samples
 MOV     R0,#0
 MOV     R1,#0
 MOV     R2,#0
 MOV     R3,#0
 MOV     R4,#0
 SWI     XSound_Configure
 ; Restrict buffer size?
 MOV     R5,#SmpSize
 CMP     R1,R5
 MOVGT   R1,R5
 ; Select filling method
 LDR     R5,[R12,#Glb_DMAMode]
 CMP     R5,#0
 BNE     Buffers_FillDMA16

Buffers_FillDMA8
 ; R1 nr of samples
 ; R4 nr of channels
 ; DMA buffer ptr is on stack
 MOVS    R4,R0
 BEQ     Buffers_FillDMA_End
 _PUSH   "R1"
 ; get scale factor given the nr of channels
 MOV     R0,R4
 MOV     R6,#&200
 _DIVIDE R5,R6,R0,R7
 ; build left and right scale factors for each channel
 MOV     R0,R4
 ADD     R7,R12,#Glb_ScaleTable
 ADD     R7,R7,R0,LSL #3
 SUB     R7,R7,#4
Buffers_FillDMA8_StereoLoop
 MOV     R1,#-128
 SWI     XSound_Stereo
 CMP     R1,#0
 ADD     R1,R1,#127
 ADDGE   R1,R1,#1
 MUL     R6,R1,R5
 MOV     R6,R6,LSR #8
 STR     R6,[R7],#-4
 RSB     R6,R6,R5
 STR     R6,[R7],#-4
 SUBS    R0,R0,#1
 BGT     Buffers_FillDMA8_StereoLoop

Buffers_FillDMA8_Fill
 ; R1 nr of samples
 ; R4 nr of channels
 _PULL   "R1"

 ; Move to start of filled part
 ADD     R2,R11,#Driver_Data
 LDR     R0,[R11,#Driver_DataOffset]
 ADD     R2,R2,R0,LSL #2
 ; Set final position
 ADD     R3,R0,R1
 CMP     R3,#SmpSize
 SUBGE   R3,R3,#SmpSize
 STR     R3,[R11,#Driver_DataOffset]
 ; See how many samples to fill to end of buffer and from start of buffer
 ; R0 nr before to end of buffer
 ; R1 nr from start of buffer
 RSB     R0,R0,#SmpSize
 CMP     R0,R1
 MOVGT   R0,R1
 SUB     R1,R1,R0
 MOV     R3,R10
 ADRL    R5,SmpLogToLin

 ; Fill end of buffer
 CMP     R0,#0
 BLE     Buffers_FillDMA8_Loop1_End
Buffers_FillDMA8_Loop1
 ADD     R6,R12,#Glb_ScaleTable
 MOV     R7,R4
 MOV     R8,#0
 MOV     R9,#0
 ; mix channels together in left and right data
Buffers_FillDMA8_Loop12
 LDRB    R10,[R3],#1
 LDRB    R10,[R5,R10]
 MOV     R10,R10,LSL #24
 MOV     R10,R10,ASR #24
 LDR     R14,[R6],#4
 MLA     R8,R10,R14,R8
 LDR     R14,[R6],#4
 MLA     R9,R10,R14,R9
 SUBS    R7,R7,#1
 BGT     Buffers_FillDMA8_Loop12

 MOV     R8,R8,LSL #16
 MOV     R9,R9,LSL #16
 ORR     R14,R8,R9,LSR #16
 STR     R14,[R2],#4
 SUBS    R0,R0,#1
 BGT     Buffers_FillDMA8_Loop1
Buffers_FillDMA8_Loop1_End

 ; Fill start of buffer
 CMP     R1,#0
 BLE     Buffers_FillDMA8_Loop2_End
 ADD     R2,R11,#Driver_Data
Buffers_FillDMA8_Loop2
 ADD     R6,R12,#Glb_ScaleTable
 MOV     R7,R4
 MOV     R8,#0
 MOV     R9,#0
 ; mix channels together in left and right data
Buffers_FillDMA8_Loop22
 LDRB    R10,[R3],#1
 LDRB    R10,[R5,R10]
 MOV     R10,R10,LSL #24
 MOV     R10,R10,ASR #24
 LDR     R14,[R6],#4
 MLA     R8,R10,R14,R8
 LDR     R14,[R6],#4
 MLA     R9,R10,R14,R9
 SUBS    R7,R7,#1
 BGT     Buffers_FillDMA8_Loop22

 MOV     R8,R8,LSL #16
 MOV     R9,R9,LSL #16
 ORR     R14,R9,R8,LSR #16
 STR     R14,[R2],#4
 SUBS    R1,R1,#1
 BGT     Buffers_FillDMA8_Loop2
Buffers_FillDMA8_Loop2_End

 B       Buffers_FillDMA_End

Buffers_FillDMA16
 ; R1 nr of samples
 ; DMA buffer ptr is on stack
 ; Move to start of filled part
 ADD     R2,R11,#Driver_Data
 LDR     R0,[R11,#Driver_DataOffset]
 ADD     R2,R2,R0,LSL #2
 ; Set final position
 ADD     R3,R0,R1
 CMP     R3,#SmpSize
 SUBGE   R3,R3,#SmpSize
 STR     R3,[R11,#Driver_DataOffset]
 ; See how many samples to fill to end of buffer and from start of buffer
 ; R0 nr before to end of buffer
 ; R1 nr from start of buffer
 RSB     R0,R0,#SmpSize
 CMP     R0,R1
 MOVGT   R0,R1
 SUB     R1,R1,R0
 MOV     R3,R10

 ; Fill end of buffer
 CMP     R0,#0
 BLE     Buffers_FillDMA16_Loop1_End
Buffers_FillDMA16_Loop1
 LDMIA   R3!,{R4-R7}
 STMIA   R2!,{R4-R7}
 SUBS    R0,R0,#4
 BGT     Buffers_FillDMA16_Loop1
Buffers_FillDMA16_Loop1_End

 ; Fill start of buffer
 CMP     R1,#0
 BLE     Buffers_FillDMA16_Loop2_End
 ADD     R2,R11,#Driver_Data
Buffers_FillDMA16_Loop2
 LDMIA   R3!,{R4-R7}
 STMIA   R2!,{R4-R7}
 SUBS    R1,R1,#4
 BGT     Buffers_FillDMA16_Loop2
Buffers_FillDMA16_Loop2_End

Buffers_FillDMA_End
 _ENDPROC

;-------------------------------------------------------------------------------
; Buffer_Lock
;
; In  - R0  1 Lock, 0 Unlock
;       R11 Driver ptr
;       R12 Global Header
;
; Out -
;-------------------------------------------------------------------------------

 ALIGN
Buffer_Lock _FNAME
 _DEFPROC "R0-R1"

 ; Only needed for DMA buffers
 ADD     R1,R12,#Glb_Buffers
 CMP     R1,R11
 _ENDPROC NE
 ; Lock/Unlock
 LDR     R1,[R12,#Glb_Flags]
 CMP     R0,#0
 ORRNE   R1,R1,#glb_flag_DMAFilling
 BICEQ   R1,R1,#glb_flag_DMAFilling
 STR     R1,[R12,#Glb_Flags]

 _ENDPROC

;-------------------------------------------------------------------------------
; Buffer_Find
;
; In  - R0  source type
;       R12 Global Header
;
; Out - R2  buffer
;       R11 Driver ptr
;-------------------------------------------------------------------------------
 ALIGN
Buffer_Find _FNAME
 _DEFPROC "R0-R1,R3-R10"
 ADD     R11,R12,#Glb_Buffers
 MOV     R4,#Size_Driver
 MLA     R11,R0,R4,R11
 CMP     R0,#0
 BEQ     Buffer_Find_DMA

Buffer_Find_SoundDriver
 ; Save handler
 MOV     R9,R0
 ; Check if not still recording a previous buffer
 MOV     R0,#0 ; read
 MOV     R1,R9
 SWI     &6DBC3 ; XSoundDriver_Control
                ; set counter in R3
 BVS     Buffer_Find_Clear
 LDR     R0,[R11,#Driver_Time]
 SUBS    R0,R0,R3
 RSBLT   R0,R0,#0
 MOV     R0,R0,LSR #2
 CMP     R0,#SmpSize
 STRLO   R0,[R11,#Driver_DataOffset]
 BLO     Buffer_Find_Recorded
Buffer_Find_SoundDriver_Record
 ; See new filling position
 MOV     R0,#0
 STR     R0,[R11,#Driver_DataOffset]
 ; Request SoundDriver to fill the buffer
 MOV     R0,#1 ; write
 MOV     R1,R9
 MOV     R2,#2 ; external buffer
 ADD     R3,R11,#Driver_RIFF
 MOV     R4,#(Driver_DataEnd - Driver_Data)
 ADD     R4,R4,#(Driver_Data - Driver_RIFF)
 SWI     &6DBC9 ; XSoundDriver_Record
 BVS     Buffer_Find_Clear
 MOV     R0,#1 ; write
 MOV     R1,R9
 MOV     R2,#1 ; play
 MOV     R3,#0
 MOV     R4,#0
 MOV     R5,#0
 SWI     &6DBC3 ; XSoundDriver_Control
 BVS     Buffer_Find_Clear
 ; Save current count
 STR     R3,[R11,#Driver_Time]
 B       Buffer_Find_Recorded

Buffer_Find_DMA
 ; Ask callback to work for a few buffers
 MOV     R0,#100
 STR     R0,[R11,#Driver_Time]

Buffer_Find_DMA_Record
 ; Select filling method
 LDR     R0,[R12,#Glb_DMABuffer1]
 CMP     R0,#0
 BEQ     Buffer_Find_Clear
 BL      Buffers_CheckDMA
 B       Buffer_Find_Recorded

Buffer_Find_Clear
 MOV     R0,#0
 STR     R0,[R11,#Driver_DataOffset]
 MOV     R1,#SmpSize
 ADD     R2,R11,#Driver_Data
 ADD     R0,R2,R1,LSL #2
 MOV     R3,#0
 MOV     R4,#0
 MOV     R5,#0
 MOV     R6,#0
Buffer_Find_Clear_Loop
 STMIA   R2!,{R3-R6}
 CMP     R2,R0
 BLO     Buffer_Find_Clear_Loop

Buffer_Find_Recorded
 ; Returns its address
 ADD     R2,R11,#Driver_Data
 _ENDPROC

;-------------------------------------------------------------------------------
; isqrt
;
; In  - R0 value
;
; Out - R0 square root
;-------------------------------------------------------------------------------
 ALIGN

isqrt _FNAME
 _DEFPROC "R1-R2"
 MOV     R1,#3<<30
 MOV     R2,#1<<30
 ; loop
 CMP     R0,R2
 SUBHS   R0,R0,R2
 ADC     R2,R1,R2,LSL #1
 CMP     R0,R2,ROR #2*1
 SUBHS   R0,R0,R2,ROR #2*1
 ADC     R2,R1,R2,LSL #1
 CMP     R0,R2,ROR #2*2
 SUBHS   R0,R0,R2,ROR #2*2
 ADC     R2,R1,R2,LSL #1
 CMP     R0,R2,ROR #2*3
 SUBHS   R0,R0,R2,ROR #2*3
 ADC     R2,R1,R2,LSL #1
 CMP     R0,R2,ROR #2*4
 SUBHS   R0,R0,R2,ROR #2*4
 ADC     R2,R1,R2,LSL #1
 CMP     R0,R2,ROR #2*5
 SUBHS   R0,R0,R2,ROR #2*5
 ADC     R2,R1,R2,LSL #1
 CMP     R0,R2,ROR #2*6
 SUBHS   R0,R0,R2,ROR #2*6
 ADC     R2,R1,R2,LSL #1
 CMP     R0,R2,ROR #2*7
 SUBHS   R0,R0,R2,ROR #2*7
 ADC     R2,R1,R2,LSL #1
 CMP     R0,R2,ROR #2*8
 SUBHS   R0,R0,R2,ROR #2*8
 ADC     R2,R1,R2,LSL #1
 CMP     R0,R2,ROR #2*9
 SUBHS   R0,R0,R2,ROR #2*9
 ADC     R2,R1,R2,LSL #1
 CMP     R0,R2,ROR #2*10
 SUBHS   R0,R0,R2,ROR #2*10
 ADC     R2,R1,R2,LSL #1
 CMP     R0,R2,ROR #2*11
 SUBHS   R0,R0,R2,ROR #2*11
 ADC     R2,R1,R2,LSL #1
 CMP     R0,R2,ROR #2*12
 SUBHS   R0,R0,R2,ROR #2*12
 ADC     R2,R1,R2,LSL #1
 CMP     R0,R2,ROR #2*13
 SUBHS   R0,R0,R2,ROR #2*13
 ADC     R2,R1,R2,LSL #1
 CMP     R0,R2,ROR #2*14
 SUBHS   R0,R0,R2,ROR #2*14
 ADC     R2,R1,R2,LSL #1
 CMP     R0,R2,ROR #2*15
 SUBHS   R0,R0,R2,ROR #2*15
 ADC     R2,R1,R2,LSL #1
 ;
 BIC     R0,R2,#3<<30

 _ENDPROC

;-------------------------------------------------------------------------------
; In  - R11 Driver data
;       R12 Global Header
;       R4, R6
;
; Out - [R11,#Driver_FFTLastInBuf]
;-------------------------------------------------------------------------------
rewind _FNAME
 _DEFPROC "R0-R12"
 _PUSH   "R4,R6"
 BL      Timer_GetTime ; R0-R1
 ; diff with record end time
 LDR     R2,[R11,#Driver_RecordTimeEnd+0] ; cs
 LDR     R3,[R11,#Driver_RecordTimeEnd+4] ; ticks
 SUBS    R4,R2,R0
 ; after end ?
 MOVLT   R0,#0
 BLT     rewind_end
 ; largely before start ?
 CMP     R4,#20
 MOVGT   R0,#0
 BGT     rewind_end
 SUB     R5,R3,R1
 ; total nr ticks = nr ticks + nr of cs * nr ticks per cs
 LDR     R9,[R12,#Glb_TimerTicksPerCS]
 MLA     R5,R4,R9,R5
 ; after end ?
 CMP     R5,#0
 MOVLT   R0,#0
 BLT     rewind_end
 ; nr ticks per second
 MOV     R2,#100
 MUL     R9,R2,R9
 ; total nr ticks * sample rate
 LDR     R3,[R11,#Driver_fmtSampleRate]
 _UMUL64 R4,R1,R5,R3,R6
 ; / nr ticks per second = nr samples to rewind
 _UDIV64 R0,R4,R1,R9,R6

rewind_end
 _PULL   "R4,R6"
 CMP     R0,#0
 MOVLT   R0,#0
 ; add what is required by fft
 ADD     R0,R0,R6,LSL R4
 ; rewind more than buffer size? limit it
 CMP     R0,#SmpSize
 MOVGE   R0,#SmpSize
 LDR     R5,[R11,#Driver_DataOffset]
 SUBS    R5,R5,R0
 ADDLT   R5,R5,#SmpSize
 STR     R5,[R11,#Driver_FFTLastInBuf]

 _ENDPROC

;-------------------------------------------------------------------------------
; SWI FillFFT
;
; In  - R0  player handle
;       R1  buffer
;       R2  buffer end
;       R3  flags
;             bit 2: rescale using player volume
;             bit 3: 0 polar output (arg, andlge), 1 cartesian (x + i y)
;             bit 4-7: log2 nr of samples or 0 for default
;             bit 8-11: steps (0 = default = 1)
;       R12 Global Header
;
; Out - R2  filled buffer end
;-------------------------------------------------------------------------------

Err_BufferTooSmall
 DCD     0
 = "Output buffer is to small to contain FFT results",0

 ALIGN

swi_FillFFT _FNAME
 _DEFPROCV "R1,R3-R12"
 MOV     R10,R0

 ; First Read sample buffer

 TST     R0,#1<<31
 BICNE   R0,R0,#1<<31
 MOVNE   R9,#MaxVolume
 BNE     swi_FillFFT_FromSource
 BL      Player_SlotFromHandle
 _ENDPROC VS
 LDR     R0,[R11,#player_soundorigin]
 TST     R3,#4
 LDRNE   R9,[R11,#player_songscalevolume]
 MOVEQ   R9,#MaxVolume
 ; R1,R2 and R9 should be preserved

swi_FillFFT_FromSource
 LDR     R8,[R12,#Glb_SoundDrivers]
 CMP     R0,#-1
 CMPNE   R0,R8
 _ERROR  HI,Err_Bad_Sound_Origin

swi_FillFFT_FromSource_Go
 ; Extract FFT Log2 size in R4
 AND     R4,R3,#&F0
 MOVS    R4,R4,LSR #4
 LDREQ   R4,[R12,#Glb_DefaultFFT]
 CMP     R4,#7
 MOVLT   R4,#7
 CMP     R4,#FFTLog2Size
 MOVGT   R4,#FFTLog2Size
 SUB     R5,R2,R1
 MOV     R6,#8
 CMP     R5,R6,LSL R4
 _ERROR  LO,Err_BufferTooSmall

 _PUSH   "R10" ; R0 to restore
 CMP     R0,#-1
 BEQ     swi_FillFFT_Clear
 _PUSH   "R1,R3-R4"
 ; Extract stepping in R6
 AND     R6,R3,#&F00
 MOVS    R6,R6,LSR #8
 MOVLE   R6,#1
 CMP     R6,#SmpMaxStep
 MOVGT   R6,#SmpMaxStep

 ; Ensure sample buffer is up to date, R0 = source
 BL      Buffer_Find
 ; Returns buffer address in R2, driver ptr in R11

 ; Lock buffer
 MOV     R0,#1
 BL      Buffer_Lock

 ; Compute new FFT on buffer
 ; Rewind from end of buffer filled part
 BL      rewind

 ; Determine power shift
 STR     R4,[R11,#Driver_FFTShift]

 ; 1) Hamming filter + build butterfly + combine (2 reals -> 1 complex)
swi_FillFFT_Butterfly

 ; Extract FFT size
 MOV     R0,#1
 MOV     R3,R0,LSL R4
 _PUSH   "R3,R11-R12"
 ; scale by volume
 MOV     R0,#MaxVolume<<8
 _DIVIDE R12,R0,R9,R1

 SUB     R3,R3,#1 ; cf bittable range [0, size -1]
 ADD     R9,R11,#Driver_Data
 ADD     R10,R9,#(SmpSize<<2) ; end of buffer
 LDR     R1,[R11,#Driver_FFTLastInBuf]
 ADD     R1,R9,R1,LSL #2
 ADD     R2,R11,#Driver_FFT
 MOV     R11,R6,LSL #2 ; steps
 ADRL    R6,FillFFT_BitTable
 ADRL    R7,FillFFT_Hamming
 ; Determine amount of stepping/shifting in tables according to FFT used
 _PUSH   "R9"
 RSB     R9,R4,#FFTLog2Size
 MOV     R0,#1
 MOV     R9,R0,LSL R9
 ; Shift needed for bit reverse (cf. 2 times with 8-bit table)
 RSB     R14,R4,#16

swi_FillFFT_Butterfly_Loop
 ; bit reverse
 AND     R4,R3,#&FF
 LDRB    R5,[R6,R3,LSR #8]
 LDRB    R4,[R6,R4]
 ORR     R5,R5,R4,LSL#8
 LDR     R0,[R1],+R11        ; stereo sample
 LDR     R8,[R7],+R9,LSL #2  ; hamming
 ; bit reverse on x bit
 MOV     R5,R5,LSR R14
 ADD     R5,R2,R5,LSL #3
 ; back to start of buffer ?
 CMP     R1,R10
 SUBHS   R1,R1,#(SmpSize<<2)
 ; volume scale
 MUL     R8,R12,R8
 MOV     R8,R8,ASR #10 ; 2 more to cope with vol > 100%
 MOV     R4,R0,ASL #16
 MOV     R0,R0,ASR #16
 MOV     R4,R4,ASR #16
 MUL     R0,R8,R0
 MUL     R4,R8,R4
 MOV     R0,R0,ASR #14 ; 2 less
 MOV     R4,R4,ASR #14 ; 2 less
 STMIA   R5,{R0,R4}
 SUBS    R3,R3,#1
 BGE     swi_FillFFT_Butterfly_Loop
 _PULL   "R9"

 _PULL   "R3,R11-R12"

 ; Unlock buffer
 MOV     R0,#0
 BL      Buffer_Lock

 ; 2) Danielson-Lanczos section, FFT of complex signal

 ; R2 still FFT data start, R3 FFTSize
 _PUSH   "R11"

 ; Use CPU specialised routines for multiplications,
 ; not really faster but SMULL version is more precise
 LDR     R0,[R12,#Glb_Flags]
 TST     R0,#glb_flag_SMLASupport
 BEQ     swi_FillFFT_DanNormal

LowStages * 7 ; from tests on RiscPC

swi_FillFFT_DanS

 ;------------------
 ; StrongARM version

 _PUSH   "R2,R3"
 ADD     R5,R2,R3,LSL #3

 ; To try to use processor cache a max, perform Danielson-Lanczos
 ; stages [0,LowStages[ on (stages - LosStages) sub-blocks
 ; then stages [LowStages, stages[ on whole block

 ; Danielson-Lanczos stages [0,LowStages[
 CMP     R3,#(1<<LowStages)
 MOVGT   R3,#(1<<LowStages)

swi_FillFFT_DanS_Loop0
 _PUSH   "R5"

 ; Danielson-Lanczos optimised stage 0 and 1 combined
 MOV     R4,R3
 MOV     R1,R2

swi_FillFFT_DanS01_Loop
 LDMIA   R1,{R5,R6,R7,R8,R9,R10,R11,R14} ; k = i + 1, j = i + 2
 ; stage 0
 ADD     R5,R5,R7           ; dr[i]' = dr[i] + dr[k]
 ADD     R6,R6,R8           ; di[i]' = di[i] + di[k]
 SUB     R7,R5,R7,ASL#1     ; dr[k]' = dr[i] - dr[k]
 SUB     R8,R6,R8,ASL#1     ; di[k]' = di[i] - di[k]
 ADD     R9 ,R9 ,R11        ; dr[i+2]' = dr[i+2] + dr[k+2]
 ADD     R10,R10,R14        ; di[i+2]' = di[i+2] + di[k+2]
 SUB     R11,R9 ,R11,ASL#1  ; dr[k+2]' = dr[i+2] - dr[k+2]
 SUB     R14,R10,R14,ASL#1  ; di[k+2]' = di[i+2] - di[k+2]
 ; stage 1
 ADD     R5 ,R5 ,R9         ; dr[i]' = dr[i] + dr[j]
 ADD     R6 ,R6 ,R10        ; di[i]' = di[i] + di[j]
 SUB     R9 ,R5 ,R9 ,ASL#1  ; dr[j]' = dr[i] - dr[j]
 SUB     R10,R6 ,R10,ASL#1  ; di[j]' = di[i] - di[j]
 SUB     R7 ,R7 ,R14        ; dr[i+1]' = dr[i+1] - di[j+1]
 ADD     R8 ,R8 ,R11        ; di[i+1]' = di[i+1] + dr[j+1]
 MOV     R0,R11
 ADD     R11,R7 ,R14,ASL#1  ; dr[j+1]' = dr[i+1] + di[j+1]
 SUB     R14,R8 ,R0 ,ASL#1  ; di[j+1]' = di[i+1] - dr[j+1]
 STMIA   R1!,{R5,R6,R7,R8,R9,R10,R11,R14}

 SUBS    R4,R4,#4
 BGT     swi_FillFFT_DanS01_Loop
swi_FillFFT_DanS01_Loop_End

 ; Danielson-Lanczos stage 2

 ADRL    R1,FillFFT_SinTable30
 MOV     R4,#4

 _PUSH   "R1,R4,R12"
 LDR     R1,[R1,#8]             ; w = wr = wi
 MOV     R6,R2                  ; i0 = 0
 ADD     R5,R2,R3,LSL #3

swi_FillFFT_DanS2_Sections_Loop
 MOV     R4,#4
 ADD     R9,R6,R4,LSL #3        ; j = i0 + mmax

 LDMIA   R9,{R10,R11,R12,R14}   ; dr[j], di[j]
                                ; tr = 1*dr[j] - 0*di[j]
                                ; ti = 1*di[j] + 0*dr[j]
 MOV     R12,R12,LSL #2
 MOV     R14,R14,LSL #2
 SUB     R4,R12,R14
 ADD     R14,R12,R14
 SMULL   R0,R12,R1,R4           ; tr = w*dr[j] - w*di[j]
 SMULL   R0,R14,R1,R14          ; ti = w*di[j] + w*dr[j]

 LDMIA   R6,{R0,R4,R7,R8}       ; dr[i], di[i]
 ADD     R0,R0,R10              ; dr[i]' = dr[i] + tr
 ADD     R4,R4,R11              ; di[i]' = di[i] + ti
 ADD     R7,R7,R12              ; dr[i]' = dr[i] + tr
 ADD     R8,R8,R14              ; di[i]' = di[i] + ti
 STMIA   R6!,{R0,R4,R7,R8}
 SUB     R10,R0,R10,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R11,R4,R11,ASL #1      ; di[j]' = di[i] - ti
 SUB     R12,R7,R12,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R14,R8,R14,ASL #1      ; di[j]' = di[i] - ti
 STMIA   R9!,{R10,R11,R12,R14}

 LDMIA   R9,{R10,R11,R12,R14}   ; dr[j], di[j]
 RSB     R4,R11,#0              ; tr = 0*dr[j] - 1*di[j]
 MOV     R11,R10                ; ti = 0*di[j] + 1*dr[j]
 MOV     R10,R4
 MOV     R12,R12,LSL #2
 MOV     R14,R14,LSL #2
 ADD     R4,R12,R14
 RSB     R4,R4,#0
 SUB     R14,R12,R14
 SMULL   R0,R12,R1,R4           ; tr = -w*dr[j] - w*di[j]
 SMULL   R0,R14,R1,R14          ; ti = -w*di[j] + w*dr[j]

 LDMIA   R6,{R0,R4,R7,R8}       ; dr[i], di[i]
 ADD     R0,R0,R10              ; dr[i]' = dr[i] + tr
 ADD     R4,R4,R11              ; di[i]' = di[i] + ti
 ADD     R7,R7,R12              ; dr[i]' = dr[i] + tr
 ADD     R8,R8,R14              ; di[i]' = di[i] + ti
 STMIA   R6!,{R0,R4,R7,R8}
 SUB     R10,R0,R10,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R11,R4,R11,ASL #1      ; di[j]' = di[i] - ti
 SUB     R12,R7,R12,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R14,R8,R14,ASL #1      ; di[j]' = di[i] - ti
 STMIA   R9!,{R10,R11,R12,R14}

 MOV     R6,R9                  ; i0 += 2max
 CMP     R6,R5                  ; i < imax
 BLO     swi_FillFFT_DanS2_Sections_Loop
swi_FillFFT_DanS2_Sections_Loop_End
 _PULL   "R1,R4,R12"

 ; Danielson-Lanczos stage 3 and above

 ADD     R1,R1,#2*8
 MOV     R4,R4,LSL #1

 CMP     R4,#(1<<LowStages)
 BGE     swi_FillFFT_DanS_Stages_Loop_End

swi_FillFFT_DanS_Stages_Loop
 _PUSH   "R1,R4,R12"
 MOV     R6,R2                  ; i0 = 0

swi_FillFFT_DanS_Sections_Loop
 LDMIA   R13,{R1,R4}            ; restore r1, r4
 ADD     R9,R6,R4,LSL #3        ; j = i0 + mmax
 ADD     R5,R6,R4,LSL #2
 _PUSH   "R4"

 ; Block 0: normal order
 LDMIA   R9,{R10,R11,R12,R14}   ; dr[j], di[j]
 ADD     R1,R1,#8               ; wr = 1.0, wi = 0.0
                                ; tr =  1*dr[j] -  0*di[j]
                                ; ti =  1*di[j] +  0*dr[j]
 LDMIA   R1!,{R7,R8}            ; wr, wi
 MOV     R12,R12,LSL #2
 MOV     R14,R14,LSL #2
 RSB     R0,R8,#0
 SMULL   R0,R4 ,R14,R0          ; tr = wr*dr[j] - wi*di[j]
 SMLAL   R0,R4 ,R12,R7
 SMULL   R0,R14,R7,R14          ; ti = wr*di[j] + wi*dr[j]
 SMLAL   R0,R14,R8,R12
 MOV     R12,R4

 LDMIA   R6,{R0,R4,R7,R8}       ; dr[i], di[i]
 ADD     R0,R0,R10              ; dr[i]' = dr[i] + tr
 ADD     R4,R4,R11              ; di[i]' = di[i] + ti
 ADD     R7,R7,R12              ; dr[i]' = dr[i] + tr
 ADD     R8,R8,R14              ; di[i]' = di[i] + ti
 STMIA   R6!,{R0,R4,R7,R8}
 SUB     R10,R0,R10,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R11,R4,R11,ASL #1      ; di[j]' = di[i] - ti
 SUB     R12,R7,R12,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R14,R8,R14,ASL #1      ; di[j]' = di[i] - ti
 STMIA   R9!,{R10,R11,R12,R14}

swi_FillFFT_DanS_Block0_Loop
 LDMIA   R9,{R10,R11,R12,R14}   ; dr[j], di[j]
 LDMIA   R1!,{R7,R8}            ; wr, wi
 MOV     R10,R10,LSL #2
 MOV     R11,R11,LSL #2
 RSB     R0,R8,#0
 SMULL   R0,R4 ,R11,R0          ; tr = wr*dr[j] - wi*di[j]
 SMLAL   R0,R4 ,R10,R7
 SMULL   R0,R11,R7,R11          ; ti = wr*di[j] + wi*dr[j]
 SMLAL   R0,R11,R8,R10
 MOV     R10,R4

 LDMIA   R1!,{R7,R8}            ; wr, wi
 MOV     R12,R12,LSL #2
 MOV     R14,R14,LSL #2
 RSB     R0,R8,#0
 SMULL   R0,R4 ,R14,R0          ; tr = wr*dr[j] - wi*di[j]
 SMLAL   R0,R4 ,R12,R7
 SMULL   R0,R14,R7,R14          ; ti = wr*di[j] + wi*dr[j]
 SMLAL   R0,R14,R8,R12
 MOV     R12,R4

 LDMIA   R6,{R0,R4,R7,R8}       ; dr[i], di[i]
 ADD     R0,R0,R10              ; dr[i]' = dr[i] + tr
 ADD     R4,R4,R11              ; di[i]' = di[i] + ti
 ADD     R7,R7,R12              ; dr[i]' = dr[i] + tr
 ADD     R8,R8,R14              ; di[i]' = di[i] + ti
 STMIA   R6!,{R0,R4,R7,R8}
 SUB     R10,R0,R10,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R11,R4,R11,ASL #1      ; di[j]' = di[i] - ti
 SUB     R12,R7,R12,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R14,R8,R14,ASL #1      ; di[j]' = di[i] - ti
 STMIA   R9!,{R10,R11,R12,R14}

 CMP     R6,R5                  ; i < i0 + mmax ?
 BLO     swi_FillFFT_DanS_Block0_Loop
swi_FillFFT_DanS_Block0_Loop_End

 _PULL   "R4"
 ADD     R5,R5,R4,LSL #2
 ; Block 2: reverse order and -wr, wi
 LDMIA   R9,{R10,R11,R12,R14}   ; dr[j], di[j]
 RSB     R4,R11,#0              ; tr =  0*dr[j] -  1*di[j]
 MOV     R11,R10                ; ti =  0*di[j] +  1*dr[j]
 MOV     R10,R4
 LDMDB   R1!,{R7,R8}            ; -wr, wi
 RSB     R7,R7,#0
 MOV     R12,R12,LSL #2
 MOV     R14,R14,LSL #2
 RSB     R0,R8,#0
 SMULL   R0,R4 ,R14,R0          ; tr = wr*dr[j] - wi*di[j]
 SMLAL   R0,R4 ,R12,R7
 SMULL   R0,R14,R7,R14          ; ti = wr*di[j] + wi*dr[j]
 SMLAL   R0,R14,R8,R12
 MOV     R12,R4

 LDMIA   R6,{R0,R4,R7,R8}       ; dr[i], di[i]
 ADD     R0,R0,R10              ; dr[i]' = dr[i] + tr
 ADD     R4,R4,R11              ; di[i]' = di[i] + ti
 ADD     R7,R7,R12              ; dr[i]' = dr[i] + tr
 ADD     R8,R8,R14              ; di[i]' = di[i] + ti
 STMIA   R6!,{R0,R4,R7,R8}
 SUB     R10,R0,R10,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R11,R4,R11,ASL #1      ; di[j]' = di[i] - ti
 SUB     R12,R7,R12,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R14,R8,R14,ASL #1      ; di[j]' = di[i] - ti
 STMIA   R9!,{R10,R11,R12,R14}

swi_FillFFT_DanS_Block2_Loop
 LDMIA   R9,{R10,R11,R12,R14}   ; dr[j], di[j]
 LDMDB   R1!,{R7,R8}            ; -wr, wi
 RSB     R7,R7,#0
 MOV     R10,R10,LSL #2
 MOV     R11,R11,LSL #2
 RSB     R0,R8,#0
 SMULL   R0,R4 ,R11,R0          ; tr = wr*dr[j] - wi*di[j]
 SMLAL   R0,R4 ,R10,R7
 SMULL   R0,R11,R7,R11          ; ti = wr*di[j] + wi*dr[j]
 SMLAL   R0,R11,R8,R10
 MOV     R10,R4

 LDMDB   R1!,{R7,R8}            ; -wr, wi
 MOV     R12,R12,LSL #2
 MOV     R14,R14,LSL #2
 RSB     R7,R7,#0
 RSB     R0,R8,#0
 SMULL   R0,R4 ,R14,R0          ; tr = wr*dr[j] - wi*di[j]
 SMLAL   R0,R4 ,R12,R7
 SMULL   R0,R14,R7,R14          ; ti = wr*di[j] + wi*dr[j]
 SMLAL   R0,R14,R8,R12
 MOV     R12,R4

 LDMIA   R6,{R0,R4,R7,R8}       ; dr[i], di[i]
 ADD     R0,R0,R10              ; dr[i]' = dr[i] + tr
 ADD     R4,R4,R11              ; di[i]' = di[i] + ti
 ADD     R7,R7,R12              ; dr[i]' = dr[i] + tr
 ADD     R8,R8,R14              ; di[i]' = di[i] + ti
 STMIA   R6!,{R0,R4,R7,R8}
 SUB     R10,R0,R10,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R11,R4,R11,ASL #1      ; di[j]' = di[i] - ti
 SUB     R12,R7,R12,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R14,R8,R14,ASL #1      ; di[j]' = di[i] - ti
 STMIA   R9!,{R10,R11,R12,R14}

 CMP     R6,R5                  ; i < i0 + mmax ?
 BLO     swi_FillFFT_DanS_Block2_Loop
swi_FillFFT_DanS_Block2_Loop_End

 MOV     R6,R9                  ; i0 += 2max
 ADD     R5,R2,R3,LSL #3
 CMP     R6,R5                  ; i < imax
 BLO     swi_FillFFT_DanS_Sections_Loop
swi_FillFFT_DanS_Sections_Loop_End

 _PULL   "R1,R4,R12"
 ADD     R1,R1,R4,LSL #2

 MOV     R4,R4,LSL #1
 CMP     R4,R3
 BLT     swi_FillFFT_DanS_Stages_Loop
swi_FillFFT_DanS_Stages_Loop_End

 ; try to keep buffer uptodate despite time spend here
 BL      Buffers_CheckDMA

 _PULL   "R5"
 ADD     R2,R2,R3,LSL #3
 CMP     R2,R5
 BLO     swi_FillFFT_DanS_Loop0
swi_FillFFT_DanS_Loop0_End
 _PULL   "R2,R3"

 CMP     R4,R3
 BEQ     swi_FillFFT_Dan_End

 ; Danielson-Lanczos stage LowStages and above
 ; change loop order for faster results

swi_FillFFT_DanSF_Stages_Loop
 _PUSH   "R1,R12"

 ADD     R9,R2,R3,LSL #3        ; j = imax
 SUB     R9,R9,R4,LSL #3        ; j -= mmax
 ADD     R1,R1,#8               ; wr = 1.0, wi = 0.0

swi_FillFFT_DanSF_Loop31
 SUB     R6,R9,R4,LSL #3        ; i = j - mmax
 LDMIA   R9,{R12,R14}           ; dr[j], di[j] as tr, ti

 LDMIA   R6,{R10,R11}           ; dr[i], di[i]
 ADD     R10,R10,R12            ; dr[i]' = dr[i] + tr
 ADD     R11,R11,R14            ; di[i]' = di[i] + ti
 STMIA   R6,{R10,R11}
 SUB     R12,R10,R12,ASL #1     ; dr[j]' = dr[i] - tr
 SUB     R14,R11,R14,ASL #1     ; di[j]' = di[i] - ti
 STMIA   R9,{R12,R14}

 SUB     R9,R9,R4,LSL #4        ; j -= 2*mmax
 CMP     R9,R2                  ; j >= 0 ?
 BHS     swi_FillFFT_DanSF_Loop31
swi_FillFFT_DanSF_Loop31_End

 ADD     R9,R9,#8               ; j += 1

swi_FillFFT_DanSF_Loop2
 ADD     R9,R9,R3,LSL #3        ; j += imax
 LDMIA   R1!,{R7,R8}            ; wr, wi

swi_FillFFT_DanSF_Loop3
 LDMIA   R9,{R10,R11}           ; dr[j], di[j]
 RSB     R6,R8,#0
 MOV     R11,R11,LSL #2
 MOV     R10,R10,LSL #2
 SMULL   R6,R12,R11,R6          ; tr = wr*dr[j] - wi*di[j]
 SMULL   R0,R14,R10,R8          ; ti = wr*di[j] + wi*dr[j]
 SMLAL   R6,R12,R10,R7
 SMLAL   R0,R14,R11,R7

 SUB     R6,R9,R4,LSL #3        ; i = j - mmax
 LDMIA   R6,{R10,R11}           ; dr[i], di[i]
 ADD     R10,R10,R12            ; dr[i]' = dr[i] + tr
 ADD     R11,R11,R14            ; di[i]' = di[i] + ti
 STMIA   R6,{R10,R11}
 SUB     R12,R10,R12,ASL #1     ; dr[j]' = dr[i] - tr
 SUB     R14,R11,R14,ASL #1     ; di[j]' = di[i] - ti
 STMIA   R9,{R12,R14}

 SUB     R9,R9,R4,LSL #4        ; j -= 2*mmax
 CMP     R9,R2                  ; j >= 0 ?
 BHS     swi_FillFFT_DanSF_Loop3
swi_FillFFT_DanSF_Loop3_End

 SUB     R6,R2,R4,LSL #2        ;
 ADD     R9,R9,#8               ; i += 1
 CMP     R9,R6                  ; i < mmax/2 ?
 BLO     swi_FillFFT_DanSF_Loop2
swi_FillFFT_DanSF_Loop2_End

 ADD     R9,R9,R3,LSL #3        ; j += imax

swi_FillFFT_DanSF_Loop61        ; wr =0; wi = 1
 SUB     R6,R9,R4,LSL #3        ; i = j - mmax
 LDMIA   R9,{R10,R11}           ; dr[j], di[j] as -ti, tr
 RSB     R12,R11,#0
 MOV     R14,R10

 LDMIA   R6,{R10,R11}           ; dr[i], di[i]
 ADD     R10,R10,R12            ; dr[i]' = dr[i] + tr
 ADD     R11,R11,R14            ; di[i]' = di[i] + ti
 STMIA   R6,{R10,R11}
 SUB     R12,R10,R12,ASL #1     ; dr[j]' = dr[i] - tr
 SUB     R14,R11,R14,ASL #1     ; di[j]' = di[i] - ti
 STMIA   R9,{R12,R14}

 SUB     R9,R9,R4,LSL #4        ; j -= 2*mmax
 CMP     R9,R2                  ; j >= 0 ?
 BHS     swi_FillFFT_DanSF_Loop61
swi_FillFFT_DanSF_Loop61_End

 ADD     R9,R9,#8               ; j += 1

swi_FillFFT_DanSF_Loop4
 ADD     R9,R9,R3,LSL #3        ; j += imax
 LDMDB   R1!,{R7,R8}            ; -wr, wi
 RSB     R7,R7,#0

swi_FillFFT_DanSF_Loop6
 LDMIA   R9,{R10,R11}           ; dr[j], di[j]
 RSB     R6,R8,#0
 MOV     R11,R11,LSL #2
 MOV     R10,R10,LSL #2
 SMULL   R6,R12,R11,R6          ; tr = wr*dr[j] - wi*di[j]
 SMULL   R0,R14,R10,R8          ; ti = wr*di[j] + wi*dr[j]
 SMLAL   R6,R12,R10,R7
 SMLAL   R0,R14,R11,R7

 SUB     R6,R9,R4,LSL #3        ; i = j - mmax
 LDMIA   R6,{R10,R11}           ; dr[i], di[i]
 ADD     R10,R10,R12            ; dr[i]' = dr[i] + tr
 ADD     R11,R11,R14            ; di[i]' = di[i] + ti
 STMIA   R6,{R10,R11}
 SUB     R12,R10,R12,ASL #1     ; dr[j]' = dr[i] - tr
 SUB     R14,R11,R14,ASL #1     ; di[j]' = di[i] - ti
 STMIA   R9,{R12,R14}

 SUB     R9,R9,R4,LSL #4        ; j -= 2*mmax
 CMP     R9,R2                  ; j >= 0 ?
 BHS     swi_FillFFT_DanSF_Loop6
swi_FillFFT_DanSF_Loop6_End

 ADD     R9,R9,#8               ; i += 1
 CMP     R9,R2                  ; i < 0 ?
 BLO     swi_FillFFT_DanSF_Loop4
swi_FillFFT_DanSF_Loop4_End

 ; try to keep buffer uptodate despite time spend here
 _PULL   "R1,R12"
 BL      Buffers_CheckDMA

 ADD     R1,R1,R4,LSL #2
 MOV     R4,R4,LSL #1
 CMP     R4,R3
 BLT     swi_FillFFT_DanSF_Stages_Loop
swi_FillFFT_DanSF_Stages_Loop_End

 B       swi_FillFFT_Dan_End

swi_FillFFT_DanNormal

 ;----------------------
 ; Pre-StrongARM version

 _PUSH   "R2,R3"
 ADD     R5,R2,R3,LSL #3

 ; To try to use processor cache a max, perform Danielson-Lanczos
 ; stages [0,LowStages[ on (stages - LosStages) sub-blocks
 ; then stages [LowStages, stages[ on whole block

 ; Danielson-Lanczos stages [0,LowStages[
 CMP     R3,#(1<<LowStages)
 MOVGT   R3,#(1<<LowStages)

swi_FillFFT_Dan_Loop0
 _PUSH   "R5"

 ; Danielson-Lanczos optimised stage 0 and 1 combined
 MOV     R4,R3
 MOV     R1,R2

swi_FillFFT_Dan01_Loop
 LDMIA   R1,{R5,R6,R7,R8,R9,R10,R11,R14} ; k = i + 1, j = i + 2
 ; stage 0
 ADD     R5,R5,R7           ; dr[i]' = dr[i] + dr[k]
 ADD     R6,R6,R8           ; di[i]' = di[i] + di[k]
 SUB     R7,R5,R7,ASL#1     ; dr[k]' = dr[i] - dr[k]
 SUB     R8,R6,R8,ASL#1     ; di[k]' = di[i] - di[k]
 ADD     R9 ,R9 ,R11        ; dr[i+2]' = dr[i+2] + dr[k+2]
 ADD     R10,R10,R14        ; di[i+2]' = di[i+2] + di[k+2]
 SUB     R11,R9 ,R11,ASL#1  ; dr[k+2]' = dr[i+2] - dr[k+2]
 SUB     R14,R10,R14,ASL#1  ; di[k+2]' = di[i+2] - di[k+2]
 ; stage 1
 ADD     R5 ,R5 ,R9         ; dr[i]' = dr[i] + dr[j]
 ADD     R6 ,R6 ,R10        ; di[i]' = di[i] + di[j]
 SUB     R9 ,R5 ,R9 ,ASL#1  ; dr[j]' = dr[i] - dr[j]
 SUB     R10,R6 ,R10,ASL#1  ; di[j]' = di[i] - di[j]
 SUB     R7 ,R7 ,R14        ; dr[i+1]' = dr[i+1] - di[j+1]
 ADD     R8 ,R8 ,R11        ; di[i+1]' = di[i+1] + dr[j+1]
 MOV     R0,R11
 ADD     R11,R7 ,R14,ASL#1  ; dr[j+1]' = dr[i+1] + di[j+1]
 SUB     R14,R8 ,R0 ,ASL#1  ; di[j+1]' = di[i+1] - dr[j+1]
 STMIA   R1!,{R5,R6,R7,R8,R9,R10,R11,R14}

 SUBS    R4,R4,#4
 BGT     swi_FillFFT_Dan01_Loop
swi_FillFFT_Dan01_Loop_End

 ; Danielson-Lanczos stage 2

 ADRL    R1,FillFFT_SinTable30
 MOV     R4,#4

 _PUSH   "R1,R4,R12"
 LDR     R1,[R1,#8]             ; w = wr = wi
 MOV     R1,R1,ASR #22
 MOV     R6,R2                  ; i0 = 0
 ADD     R5,R2,R3,LSL #3

swi_FillFFT_Dan2_Sections_Loop
 MOV     R4,#4
 ADD     R9,R6,R4,LSL #3        ; j = i0 + mmax

 LDMIA   R9,{R10,R11,R12,R14}   ; dr[j], di[j]
                                ; tr = 1*dr[j] - 0*di[j]
                                ; ti = 1*di[j] + 0*dr[j]
 SUB     R4,R12,R14
 ADD     R14,R12,R14
 MUL     R12,R1,R4              ; tr = w*dr[j] - w*di[j]
 MUL     R14,R1,R14             ; ti = w*di[j] + w*dr[j]
 MOV     R12,R12,ASR #8
 MOV     R14,R14,ASR #8

 LDMIA   R6,{R0,R4,R7,R8}       ; dr[i], di[i]
 ADD     R0,R0,R10              ; dr[i]' = dr[i] + tr
 ADD     R4,R4,R11              ; di[i]' = di[i] + ti
 ADD     R7,R7,R12              ; dr[i]' = dr[i] + tr
 ADD     R8,R8,R14              ; di[i]' = di[i] + ti
 STMIA   R6!,{R0,R4,R7,R8}
 SUB     R10,R0,R10,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R11,R4,R11,ASL #1      ; di[j]' = di[i] - ti
 SUB     R12,R7,R12,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R14,R8,R14,ASL #1      ; di[j]' = di[i] - ti
 STMIA   R9!,{R10,R11,R12,R14}

 LDMIA   R9,{R10,R11,R12,R14}   ; dr[j], di[j]
 RSB     R4,R11,#0              ; tr = 0*dr[j] - 1*di[j]
 MOV     R11,R10                ; ti = 0*di[j] + 1*dr[j]
 MOV     R10,R4
 ADD     R4,R12,R14
 RSB     R4,R4,#0
 SUB     R14,R12,R14
 MUL     R12,R1,R4              ; tr = -w*dr[j] - w*di[j]
 MUL     R14,R1,R14             ; ti = -w*di[j] + w*dr[j]
 MOV     R12,R12,ASR #8
 MOV     R14,R14,ASR #8

 LDMIA   R6,{R0,R4,R7,R8}       ; dr[i], di[i]
 ADD     R0,R0,R10              ; dr[i]' = dr[i] + tr
 ADD     R4,R4,R11              ; di[i]' = di[i] + ti
 ADD     R7,R7,R12              ; dr[i]' = dr[i] + tr
 ADD     R8,R8,R14              ; di[i]' = di[i] + ti
 STMIA   R6!,{R0,R4,R7,R8}
 SUB     R10,R0,R10,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R11,R4,R11,ASL #1      ; di[j]' = di[i] - ti
 SUB     R12,R7,R12,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R14,R8,R14,ASL #1      ; di[j]' = di[i] - ti
 STMIA   R9!,{R10,R11,R12,R14}

 MOV     R6,R9                  ; i0 += 2max
 CMP     R6,R5                  ; i < imax
 BLO     swi_FillFFT_Dan2_Sections_Loop
swi_FillFFT_Dan2_Sections_Loop_End
 _PULL   "R1,R4,R12"

 ; Danielson-Lanczos stage 3 and above

 ADD     R1,R1,#2*8
 MOV     R4,R4,LSL #1

 CMP     R4,#(1<<LowStages)
 BGE     swi_FillFFT_Dan_Stages_Loop_End

swi_FillFFT_Dan_Stages_Loop
 _PUSH   "R1,R4,R12"
 MOV     R6,R2                  ; i0 = 0

swi_FillFFT_Dan_Sections_Loop
 LDMIA   R13,{R1,R4}            ; restore r1, r4
 ADD     R9,R6,R4,LSL #3        ; j = i0 + mmax
 ADD     R5,R6,R4,LSL #2
 _PUSH   "R4"

 ; Block 0: normal order
 LDMIA   R9,{R10,R11,R12,R14}   ; dr[j], di[j]
 ADD     R1,R1,#8               ; wr = 1.0, wi = 0.0
                                ; tr =  1*dr[j] -  0*di[j]
                                ; ti =  1*di[j] +  0*dr[j]
 LDMIA   R1!,{R7,R8}            ; wr, wi
 MOV     R7,R7,ASR #22
 MOV     R8,R8,ASR #22

 MUL     R0,R8,R14              ; tr = wr*dr[j] - wi*di[j]
 MUL     R4,R7,R12
 SUB     R0,R4,R0
 MUL     R14,R7,R14             ; ti = wr*di[j] + wi*dr[j]
 MLA     R14,R8,R12,R14
 MOV     R12,R0 ,ASR #8
 MOV     R14,R14,ASR #8

 LDMIA   R6,{R0,R4,R7,R8}       ; dr[i], di[i]
 ADD     R0,R0,R10              ; dr[i]' = dr[i] + tr
 ADD     R4,R4,R11              ; di[i]' = di[i] + ti
 ADD     R7,R7,R12              ; dr[i]' = dr[i] + tr
 ADD     R8,R8,R14              ; di[i]' = di[i] + ti
 STMIA   R6!,{R0,R4,R7,R8}
 SUB     R10,R0,R10,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R11,R4,R11,ASL #1      ; di[j]' = di[i] - ti
 SUB     R12,R7,R12,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R14,R8,R14,ASL #1      ; di[j]' = di[i] - ti
 STMIA   R9!,{R10,R11,R12,R14}

swi_FillFFT_Dan_Block0_Loop
 LDMIA   R9,{R10,R11,R12,R14}   ; dr[j], di[j]
 LDMIA   R1!,{R7,R8}            ; wr, wi
 MOV     R7,R7,ASR #22
 MOV     R8,R8,ASR #22
 MUL     R0,R8,R11              ; tr = wr*dr[j] - wi*di[j]
 MUL     R4,R7,R10
 SUB     R0,R4,R0
 MUL     R11,R7,R11             ; ti = wr*di[j] + wi*dr[j]
 MLA     R11,R8,R10,R11
 MOV     R10,R0 ,ASR #8
 MOV     R11,R11,ASR #8

 LDMIA   R1!,{R7,R8}            ; wr, wi
 MOV     R7,R7,ASR #22
 MOV     R8,R8,ASR #22
 MUL     R0,R8,R14              ; tr = wr*dr[j] - wi*di[j]
 MUL     R4,R7,R12
 SUB     R0,R4,R0
 MUL     R14,R7,R14             ; ti = wr*di[j] + wi*dr[j]
 MLA     R14,R8,R12,R14
 MOV     R12,R0 ,ASR #8
 MOV     R14,R14,ASR #8

 LDMIA   R6,{R0,R4,R7,R8}       ; dr[i], di[i]
 ADD     R0,R0,R10              ; dr[i]' = dr[i] + tr
 ADD     R4,R4,R11              ; di[i]' = di[i] + ti
 ADD     R7,R7,R12              ; dr[i]' = dr[i] + tr
 ADD     R8,R8,R14              ; di[i]' = di[i] + ti
 STMIA   R6!,{R0,R4,R7,R8}
 SUB     R10,R0,R10,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R11,R4,R11,ASL #1      ; di[j]' = di[i] - ti
 SUB     R12,R7,R12,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R14,R8,R14,ASL #1      ; di[j]' = di[i] - ti
 STMIA   R9!,{R10,R11,R12,R14}

 CMP     R6,R5                  ; i < i0 + mmax ?
 BLO     swi_FillFFT_Dan_Block0_Loop
swi_FillFFT_Dan_Block0_Loop_End

 _PULL   "R4"
 ADD     R5,R5,R4,LSL #2
 ; Block 2: reverse order and -wr, wi
 LDMIA   R9,{R10,R11,R12,R14}   ; dr[j], di[j]
 RSB     R4,R11,#0              ; tr =  0*dr[j] -  1*di[j]
 MOV     R11,R10                ; ti =  0*di[j] +  1*dr[j]
 MOV     R10,R4
 LDMDB   R1!,{R7,R8}            ; -wr, wi
 RSB     R7,R7,#0
 MOV     R7,R7,ASR #22
 MOV     R8,R8,ASR #22
 MUL     R0,R8,R14              ; tr = wr*dr[j] - wi*di[j]
 MUL     R4,R7,R12
 SUB     R0,R4,R0
 MUL     R14,R7,R14             ; ti = wr*di[j] + wi*dr[j]
 MLA     R14,R8,R12,R14
 MOV     R12,R0 ,ASR #8
 MOV     R14,R14,ASR #8

 LDMIA   R6,{R0,R4,R7,R8}       ; dr[i], di[i]
 ADD     R0,R0,R10              ; dr[i]' = dr[i] + tr
 ADD     R4,R4,R11              ; di[i]' = di[i] + ti
 ADD     R7,R7,R12              ; dr[i]' = dr[i] + tr
 ADD     R8,R8,R14              ; di[i]' = di[i] + ti
 STMIA   R6!,{R0,R4,R7,R8}
 SUB     R10,R0,R10,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R11,R4,R11,ASL #1      ; di[j]' = di[i] - ti
 SUB     R12,R7,R12,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R14,R8,R14,ASL #1      ; di[j]' = di[i] - ti
 STMIA   R9!,{R10,R11,R12,R14}

swi_FillFFT_Dan_Block2_Loop
 LDMIA   R9,{R10,R11,R12,R14}   ; dr[j], di[j]
 LDMDB   R1!,{R7,R8}            ; -wr, wi
 RSB     R7,R7,#0
 MOV     R7,R7,ASR #22
 MOV     R8,R8,ASR #22
 MUL     R0,R8,R11              ; tr = wr*dr[j] - wi*di[j]
 MUL     R4,R7,R10
 SUB     R0,R4,R0
 MUL     R11,R7,R11             ; ti = wr*di[j] + wi*dr[j]
 MLA     R11,R8,R10,R11
 MOV     R10,R0 ,ASR #8
 MOV     R11,R11,ASR #8

 LDMDB   R1!,{R7,R8}            ; -wr, wi
 RSB     R7,R7,#0
 MOV     R7,R7,ASR #22
 MOV     R8,R8,ASR #22
 MUL     R0,R8,R14              ; tr = wr*dr[j] - wi*di[j]
 MUL     R4,R7,R12
 SUB     R0,R4,R0
 MUL     R14,R7,R14             ; ti = wr*di[j] + wi*dr[j]
 MLA     R14,R8,R12,R14
 MOV     R12,R0 ,ASR #8
 MOV     R14,R14,ASR #8

 LDMIA   R6,{R0,R4,R7,R8}       ; dr[i], di[i]
 ADD     R0,R0,R10              ; dr[i]' = dr[i] + tr
 ADD     R4,R4,R11              ; di[i]' = di[i] + ti
 ADD     R7,R7,R12              ; dr[i]' = dr[i] + tr
 ADD     R8,R8,R14              ; di[i]' = di[i] + ti
 STMIA   R6!,{R0,R4,R7,R8}
 SUB     R10,R0,R10,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R11,R4,R11,ASL #1      ; di[j]' = di[i] - ti
 SUB     R12,R7,R12,ASL #1      ; dr[j]' = dr[i] - tr
 SUB     R14,R8,R14,ASL #1      ; di[j]' = di[i] - ti
 STMIA   R9!,{R10,R11,R12,R14}

 CMP     R6,R5                  ; i < i0 + mmax ?
 BLO     swi_FillFFT_Dan_Block2_Loop
swi_FillFFT_Dan_Block2_Loop_End

 MOV     R6,R9                  ; i0 += 2max
 ADD     R5,R2,R3,LSL #3
 CMP     R6,R5                  ; i < imax
 BLO     swi_FillFFT_Dan_Sections_Loop
swi_FillFFT_Dan_Sections_Loop_End

 _PULL   "R1,R4,R12"
 ADD     R1,R1,R4,LSL #2

 MOV     R4,R4,LSL #1
 CMP     R4,R3
 BLT     swi_FillFFT_Dan_Stages_Loop
swi_FillFFT_Dan_Stages_Loop_End

 ; try to keep buffer uptodate despite time spend here
 BL      Buffers_CheckDMA

 _PULL   "R5"
 ADD     R2,R2,R3,LSL #3
 CMP     R2,R5
 BLO     swi_FillFFT_Dan_Loop0
swi_FillFFT_Dan_Loop0_End
 _PULL   "R2,R3"

 CMP     R4,R3
 BEQ     swi_FillFFT_Dan_End

 ; Danielson-Lanczos stage LowStages and above
 ; change loop order for faster results

swi_FillFFT_DanF_Stages_Loop
 _PUSH   "R1,R12"

 ADD     R9,R2,R3,LSL #3        ; j = imax
 SUB     R9,R9,R4,LSL #3        ; j -= mmax
 ADD     R1,R1,#8               ; wr = 1.0, wi = 0.0

swi_FillFFT_DanF_Loop31
 SUB     R6,R9,R4,LSL #3        ; i = j - mmax
 LDMIA   R9,{R12,R14}           ; dr[j], di[j] as tr, t1

 LDMIA   R6,{R10,R11}           ; dr[i], di[i]
 ADD     R10,R10,R12            ; dr[i]' = dr[i] + tr
 ADD     R11,R11,R14            ; di[i]' = di[i] + ti
 STMIA   R6,{R10,R11}
 SUB     R10,R10,R12,ASL #1     ; dr[j]' = dr[i] - tr
 SUB     R11,R11,R14,ASL #1     ; di[j]' = di[i] - ti
 STMIA   R9,{R10,R11}

 SUB     R9,R9,R4,LSL #4        ; j -= 2*mmax
 CMP     R9,R2                  ; j >= 0 ?
 BHS     swi_FillFFT_DanF_Loop31
swi_FillFFT_DanF_Loop31_End

 ADD     R9,R9,#8               ; j += 1

swi_FillFFT_DanF_Loop2
 ADD     R9,R9,R3,LSL #3        ; j += imax
 LDMIA   R1!,{R7,R8}            ; wr, wi
 MOV     R7,R7,ASR #22
 MOV     R8,R8,ASR #22

swi_FillFFT_DanF_Loop3
 SUB     R6,R9,R4,LSL #3        ; i = j - mmax
 LDMIA   R9,{R10,R11}           ; dr[j], di[j]
 MUL     R0,R8,R11              ; tr = wr*dr[j] - wi*di[j]
 MUL     R12,R7,R10
 SUB     R12,R12,R0
 MUL     R0,R7,R11              ; ti = wr*di[j] + wi*dr[j]
 MLA     R14,R8,R10,R0
 MOV     R12,R12,ASR #8
 MOV     R14,R14,ASR #8

 LDMIA   R6,{R10,R11}           ; dr[i], di[i]
 ADD     R10,R10,R12            ; dr[i]' = dr[i] + tr
 ADD     R11,R11,R14            ; di[i]' = di[i] + ti
 STMIA   R6,{R10,R11}
 SUB     R10,R10,R12,ASL #1     ; dr[j]' = dr[i] - tr
 SUB     R11,R11,R14,ASL #1     ; di[j]' = di[i] - ti
 STMIA   R9,{R10,R11}

 SUB     R9,R9,R4,LSL #4        ; j -= 2*mmax
 CMP     R9,R2                  ; j >= 0 ?
 BHS     swi_FillFFT_DanF_Loop3
swi_FillFFT_DanF_Loop3_End

 SUB     R6,R2,R4,LSL #2        ;
 ADD     R9,R9,#8               ; i += 1
 CMP     R9,R6                  ; i < mmax/2 ?
 BLO     swi_FillFFT_DanF_Loop2
swi_FillFFT_DanF_Loop2_End

 ADD     R9,R9,R3,LSL #3        ; j += imax

swi_FillFFT_DanF_Loop61         ; wr =0; wi = 1
 SUB     R6,R9,R4,LSL #3        ; i = j - mmax
 LDMIA   R9,{R10,R11}           ; dr[j], di[j] as -ti, tr
 RSB     R12,R11,#0
 MOV     R14,R10

 LDMIA   R6,{R10,R11}           ; dr[i], di[i]
 ADD     R10,R10,R12            ; dr[i]' = dr[i] + tr
 ADD     R11,R11,R14            ; di[i]' = di[i] + ti
 STMIA   R6,{R10,R11}
 SUB     R10,R10,R12,ASL #1     ; dr[j]' = dr[i] - tr
 SUB     R11,R11,R14,ASL #1     ; di[j]' = di[i] - ti
 STMIA   R9,{R10,R11}

 SUB     R9,R9,R4,LSL #4        ; j -= 2*mmax
 CMP     R9,R2                  ; j >= 0 ?
 BHS     swi_FillFFT_DanF_Loop61
swi_FillFFT_DanF_Loop61_End

 ADD     R9,R9,#8               ; j += 1

swi_FillFFT_DanF_Loop4
 ADD     R9,R9,R3,LSL #3        ; j += imax
 LDMDB   R1!,{R7,R8}            ; -wr, wi
 RSB     R7,R7,#0
 MOV     R7,R7,ASR #22
 MOV     R8,R8,ASR #22

swi_FillFFT_DanF_Loop6
 SUB     R6,R9,R4,LSL #3        ; i = j - mmax
 LDMIA   R9,{R10,R11}           ; dr[j], di[j]
 MUL     R0,R8,R11              ; tr = wr*dr[j] - wi*di[j]
 MUL     R12,R7,R10
 SUB     R12,R12,R0
 MUL     R0,R7,R11              ; ti = wr*di[j] + wi*dr[j]
 MLA     R14,R8,R10,R0
 MOV     R12,R12,ASR #8
 MOV     R14,R14,ASR #8

 LDMIA   R6,{R10,R11}           ; dr[i], di[i]
 ADD     R10,R10,R12            ; dr[i]' = dr[i] + tr
 ADD     R11,R11,R14            ; di[i]' = di[i] + ti
 STMIA   R6,{R10,R11}
 SUB     R10,R10,R12,ASL #1     ; dr[j]' = dr[i] - tr
 SUB     R11,R11,R14,ASL #1     ; di[j]' = di[i] - ti
 STMIA   R9,{R10,R11}

 SUB     R9,R9,R4,LSL #4        ; j -= 2*mmax
 CMP     R9,R2                  ; j >= 0 ?
 BHS     swi_FillFFT_DanF_Loop6
swi_FillFFT_DanF_Loop6_End

 ADD     R9,R9,#8               ; i += 1
 CMP     R9,R2                  ; i < 0 ?
 BLO     swi_FillFFT_DanF_Loop4
swi_FillFFT_DanF_Loop4_End

 ; try to keep buffer uptodate despite time spend here
 _PULL   "R1,R12"
 BL      Buffers_CheckDMA

 ADD     R1,R1,R4,LSL #2
 MOV     R4,R4,LSL #1
 CMP     R4,R3
 BLT     swi_FillFFT_DanF_Stages_Loop
swi_FillFFT_DanF_Stages_Loop_End

swi_FillFFT_Dan_End
 _PULL   "R11"

 ; 3) Extract Left and right FFTs

swi_FillFFT_Extract
 ; R2 still points to FFTleft, R3 FFTSize
 MOV     R1,R2
 _PULL   "R2,R3-R4"
 ; Polar or cartesian
 TST     R3,#8
 BEQ     swi_FillFFT_Extract_Polar

swi_FillFFT_Extract_Cartesian
 MOV     R5,#1
 MOV     R3,R5,LSL R4
 LDR     R9,[R11,#Driver_FFTShift]
 LDR     R5,[R1,#0]     ; lr[0] = dr[0]
 MOV     R5,R5,ASR R9
 MOV     R6,#0          ; li[0] = 0
 LDR     R7,[R1,#4]     ; rr[0] = di[0]
 MOV     R7,R7,ASR R9
 MOV     R8,#0          ; ri[0] = 0
 STMIA   R2!,{R5-R8}

 ADD     R9,R9,#1
 MOV     R4,R3,LSR #1
 SUB     R4,R4,#1
 ADD     R3,R1,R3,LSL #3
 ADD     R1,R1,#8
swi_FillFFT_Extract_Cartesian_Loop
 LDMIA   R1!,{R5,R6}    ; dr[i], di[i]
 LDMDB   R3!,{R7,R8}    ; dr[n-i], di[n-i]

 SUB     R5,R5,R7       ; lr[i] = (dr[i] - dr[n-i])/2
 ADD     R6,R6,R8       ; li[i] = (di[i] + di[n-i])/2
 ADD     R7,R5,R7,ASL #1; rr[i] = (dr[i] + dr[n-i])/2
 SUB     R8,R6,R8,ASL #1; ri[i] = (di[i] - di[n-i])/2
 MOV     R5,R5,ASR R9
 MOV     R6,R6,ASR R9
 MOV     R7,R7,ASR R9
 MOV     R8,R8,ASR R9

 STMIA   R2!,{R5-R8}

 SUBS    R4,R4,#1
 BGT     swi_FillFFT_Extract_Cartesian_Loop

 B       swi_FillFFT_End

swi_FillFFT_Extract_Polar
 MOV     R5,#1
 MOV     R3,R5,LSL R4
 LDR     R9,[R11,#Driver_FFTShift]
 LDR     R5,[R1,#0]     ; lr[0] = dr[0]
 MOV     R6,#0          ; li[0] = 0
 LDR     R7,[R1,#4]     ; rr[0] = di[0]
 MOV     R8,#0          ; ri[0] = 0
 CMP     R5,#0
 RSBLT   R5,R5,#0
 MOV     R5,R5,ASR R9
 CMP     R5,#&10000
 MOVGE   R5,#&0FF00
 CMP     R7,#0
 RSBLT   R7,R7,#0
 MOV     R7,R7,ASR R9
 CMP     R7,#&10000
 MOVGE   R7,#&0FF00
 STMIA   R2!,{R5-R8}

 ADD     R9,R9,#1
 MOV     R4,R3,LSR #1
 SUB     R4,R4,#1
 ADD     R3,R1,R3,LSL #3
 ADD     R1,R1,#8
swi_FillFFT_Extract_Polar_Loop
 LDMIA   R1!,{R5,R6}    ; dr[i], di[i]
 LDMDB   R3!,{R7,R8}    ; dr[n-i], di[n-i]

 SUBS    R10,R5,R7      ; lr[i] = (dr[i] - dr[n-i])/2
 RSBLT   R10,R10,#0     ; better rounding, but beware of phase (uncalculated)
 MOV     R10,R10,ASR R9
 ADDS    R11,R6,R8      ; li[i] = (di[i] + di[n-i])/2
 MUL     R0,R10,R10     ; a = sqrt(lr^2 + li^2)
 RSBLT   R11,R11,#0     ;
 MOV     R11,R11,ASR R9
 MLA     R0,R11,R11,R0
 BL      isqrt
 MOV     R12,R0
 CMP     R12,#&10000
 MOVGE   R12,#&0FF00

 ADDS    R10,R5,R7      ; rr[i] = (dr[i] + dr[n-i])/2
 RSBLT   R10,R10,#0     ;
 MOV     R10,R10,ASR R9
 SUBS    R11,R6,R8      ; ri[i] = (di[i] - di[n-i])/2
 MUL     R0,R10,R10
 RSBLT   R11,R11,#0     ;
 MOV     R11,R11,ASR R9
 MLA     R0,R11,R11,R0
 BL      isqrt
 CMP     R0,#&10000
 MOVGE   R0,#&0FF00

 MOV     R6,#0
 MOV     R7,R12
 MOV     R8,#0
 STMIA   R2!,{R0,R6-R8}

 SUBS    R4,R4,#1
 BGT     swi_FillFFT_Extract_Polar_Loop

 B       swi_FillFFT_End

swi_FillFFT_Clear
 ; R4 is Log2Size
 MOV     R6,#8
 ADD     R2,R1,R6,LSL R4
 MOV     R3,#0
 MOV     R4,#0
 MOV     R5,#0
 MOV     R6,#0
swi_FillFFT_ClearLoop
 STMIA   R1!,{R3-R6}
 CMP     R1,R2
 BLO     swi_FillFFT_ClearLoop

swi_FillFFT_End
 _PULL   "R0" ; R0 to restore
 _ENDPROC

FillFFT_SinTable30
 GET hdr.SinTable30

FillFFT_BitTable
 GET hdr.BitTable

FillFFT_Hamming
 GET hdr.Hamming

;-------------------------------------------------------------------------------
; SWI BufferSizes
;
; In  - R12 Global Header
;
; Out - R0  max points in sample buffers
;       R1  default number of points in FFT
;       R2  Byte 0: Log2 of min FFT size
;           Byte 1: Log2 of default FFT size
;           Byte 2: Log2 of max FFT size
;           Byte 3: Maximal sample stepping
;-------------------------------------------------------------------------------

swi_BufferSizes _FNAME
 _DEFPROCV
 MOV     R0,#SmpSize
 LDR     R2,[R12,#Glb_DefaultFFT]
 MOV     R1,#1
 MOV     R1,R1,LSL R2
 MOV     R1,R1,LSR #1
 MOV     R2,R2,LSL #8
 ORR     R2,R2,#FFTLog2Size<<16
 ORR     R2,R2,#7
 ORR     R2,R2,#SmpMaxStep<<24
 _ENDPROC

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; End of the module                                     ;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 AREA |ZEnd_Of_Module|,READONLY,CODE

Module_end

 END
