
		AREA	|ARM$$code|,CODE,READONLY

		EXPORT	scale_16_16
		EXPORT	scale_16_32
		EXPORT	scale_32_32



; Convert from 16bpp to 16bpp
;
; entry	a1 -> destination
;	a2 -> source

scale_16_16	PLD	[a2]
		STMFD	sp!,{v1-v6,lr}

		MOV	ip,#320
		MOV	lr,#&1F<<5
		MOV	a3,#&1F<<10

		LDR	a4,[a2],#4
scal16to16_lp	PLD	[a2,#60]

		AND	v1,a4,#&1F
		AND	v2,a4,#&1F<<5
		AND	v3,a4,#&1F<<10

scal16to16b_lp	AND	v4,a4,#&1F<<16
		AND	v5,a4,#&1F<<21
		AND	v6,a4,#&1F<<26

		STRH	a4,[a1],#2

		ADD	v1,v1,v4,LSR #16
		ADD	v2,v2,v5,LSR #16
		ADD	v3,v3,v6,LSR #16
		MOV	v1,v1,LSR #1
		AND	v2,lr,v2,LSR #1
		AND	v3,a3,v3,LSR #1
		ORR	v1,v1,v2
		ORR	v1,v1,v3
		STRH	v1,[a1],#2

		MOV	a4,a4,LSR #16
		STRH	a4,[a1],#2

		SUBS	ip,ip,#2
		LDRGT	a4,[a2],#4

		BLE	leave16to16
		PLD	[a2,#56]

		AND	v1,a4,#&1F
		AND	v2,a4,#&1F<<5
		AND	v3,a4,#&1F<<10

		ADD	v4,v1,v4,LSR #16
		ADD	v5,v2,v5,LSR #16
		ADD	v6,v3,v6,LSR #16
		MOV	v4,v4,LSR #1
		AND	v5,lr,v5,LSR #1
		AND	v6,a3,v6,LSR #1
		ORR	v4,v4,v5
		ORR	v4,v4,v6
		STRH	v4,[a1],#2
		B	scal16to16b_lp

leave16to16	;replicate final pixel

		STRH	a4,[a1],#2
		LDMFD	sp!,{v1-v6,pc}


; Convert from 16bpp to 32bpp
;
; entry	a1 -> destination
;	a2 -> source

scale_16_32	PLD	[a2]
		STMFD	sp!,{v1-v6,lr}

		MOV	ip,#320
		ADR	lr,lut5to8

		LDR	a4,[a2],#4
scal16to32_lp	PLD	[a2,#60]

		AND	v1,a4,#&1F
		AND	v2,a4,#&1F<<5
		AND	v3,a4,#&1F<<10

		LDRB	v1,[lr,v1]
		LDRB	v2,[lr,v2,LSR #5]
		LDRB	v3,[lr,v3,LSR #10]

scal16to32b_lp	AND	v4,a4,#&1F<<16
		AND	v5,a4,#&1F<<21
		AND	v6,a4,#&1F<<26

		LDRB	v4,[lr,v4,LSR #16]
		LDRB	v5,[lr,v5,LSR #21]
		LDRB	v6,[lr,v6,LSR #26]

		STRB	v1,[a1],#1
		STRB	v2,[a1],#1
		STRH	v3,[a1],#2

		ADD	v1,v1,v4
		ADD	v2,v2,v5
		ADD	v3,v3,v6
		MOV	v1,v1,LSR #1
		MOV	v2,v2,LSR #1
		MOV	v3,v3,LSR #1
		STRB	v1,[a1],#1
		STRB	v2,[a1],#1
		STRH	v3,[a1],#2

		STRB	v4,[a1],#1
		STRB	v5,[a1],#1
		STRH	v6,[a1],#2

		SUBS	ip,ip,#2
		LDRGT	a4,[a2],#4
		BLE	leave16to32
		PLD	[a2,#56]

		AND	v1,a4,#&1F
		AND	v2,a4,#&1F<<5
		AND	v3,a4,#&1F<<10

		LDRB	v1,[lr,v1]
		LDRB	v2,[lr,v2,LSR #5]
		LDRB	v3,[lr,v3,LSR #10]

		ADD	v4,v1,v4
		ADD	v5,v2,v5
		ADD	v6,v3,v6
		MOV	v4,v4,LSR #1
		MOV	v5,v5,LSR #1
		MOV	v6,v6,LSR #1

		STRB	v4,[a1],#1
		STRB	v5,[a1],#1
		STRH	v6,[a1],#2
		B	scal16to32b_lp

leave16to32	;replicate final pixel

		STRB	v4,[a1],#1
		STRB	v5,[a1],#1
		STRB	v6,[a1],#2

		LDMFD	sp!,{v1-v6,pc}

		ALIGN	32

lut5to8		DCB	0,&8,&10,&18,&21,&29,&31,&39,&42,&4A
		DCB	&52,&5A,&63,&6B,&73,&7B,&84,&8C,&94
		DCB	&9C,&A5,&AD,&B5,&BD,&C6,&CE,&D6,&DE
		DCB	&E7,&EF,&F7,&FF


scale_32_32	MOV	ip,#320
		STMFD	sp!,{v1-v3,lr}

scal32to32_lp	PLD	[a1,#64]
		LDR	v1,[a1],#4
		LDR	v2,[a1],#4
		LDR	v3,[a1],#4
		LDR	lr,[a1],#4

;??? we're supposed to be interpolating here ;)

		STR	v1,[a2],#4
		STR	v1,[a2],#4
		STR	v2,[a2],#4
		STR	v2,[a2],#4
		STR	v3,[a2],#4
		STR	v3,[a2],#4
		STR	lr,[a2],#4
		STR	lr,[a2],#4

		SUBS	ip,ip,#4
		BGT	scal32to32_lp

		LDMFD	sp!,{v1-v3,pc}

		END
