		TTL	> Idea Assembler bits

;Assembler bits for Idea

		GET	hdr.Regs
		GET	hdr.CMacros

		PREAMBLE

ROUNDS		EQU	8				; Don't change this value, should be 8

;Multiplication, modulo (2**16)+1
;static word16 mul(register word16 a, register word16 b)
;{
;        register word32 q;
;        register long int p;
;        if (a==0)
;                p = maxim - (long int) b;
;        else
;                if (b==0)
;                        p = maxim - (long int) a;
;                else
;                {       q = (word32)a * (word32)b;
;                        p = (q & mask16) - (q>>16);
;                        if (p<=0)
;                                p = p+maxim;
;                }
;        return (lower16(p));
;}        /* mul */

;This macro multiplies a by b into c modulo (2^16+1)
;maxim = 2^16+1
;mask = 2^16 -1


		MACRO
$label		idea_mul	$a,$b,$c,$mask,$maxim

		CMP	$a,#0				;if a = 0 then
		SUBEQ	$c,$maxim,$b			; c <- maxim - b
		BEQ	%FT50

		CMP	$b,#0				;if b = 0 then
		SUBEQ	$c,$maxim,$a			; c <- maxim - a
		BEQ	%FT50

		MUL	$c,$a,$b			;c <- a * b
		AND	$b,$c,$mask			;b <- c AND mask
		SUBS	$c,$b,$c,LSR #16		;c <- (c AND mask) - (c >> 16)
		ADDMI	$c,$c,$maxim			;if c < 0 then c+=maxim
50
		AND	$c,$c,$mask			;c <- c AND mask
		MEND

;static word16 mul(register word16 a, register word16 b)

		STARTCODE	mul

		STMFD	sp !,{v1,lr}
		MOV	a3,#&10000
		ADD	a4,a3,#1
		SUB	a3,a3,#1		;set up mask and maxim for idea_mul
		idea_mul	a1, a2, v1, a3, a4
		MOV	a1,v1
		LDMFD	sp !,{v1,pc}		; removed ^  v0.08


;/*      IDEA encryption/decryption algorithm */
;extern void cipher_idea(word16 inblock[4], word16 outblock[4],
;                 word32 Z[6][ROUNDS+1])

inblock		RN	a1
outblock	RN	a2
Z		RN	a3
x1		RN	a4
x2		RN	v1
x3		RN	v2
x4		RN	v3
kk		RN	v4
t1		RN	v5
t2		RN	v6
a		RN	sl
r		RN	fp
; changed s1-s2 to add Z prefix to avoid multiple declarations  v0.08
Zs1		RN	ip
Zs2		RN	lr

mask		RN	inblock
maxim		RN	outblock


		MACRO
$label		lower16	$r,$mask
$label		AND	$r,$r,$mask
		MEND


		MACRO
$label		LDRW	$r,$from,$offset,$mask
	ASSERT	$offset >= 0 :LAND: $offset <= 3	; ensure ok   +v0.08
	IF	$offset  = 0 :LOR:  $offset  = 2	; aligned     +v0.08
$label		LDR	$r,[$from,#$offset << 1]
		lower16	$r,$mask
	ELSE				; address needs aligning      +v0.08
$label		LDR	$r,[$from,#($offset-1) << 1 ] 	; now aligned +v0.08
		MOV	$r,$r,LSR #16	; shift xxxx.... to xxxx      +v0.08
	ENDIF       						;     +v0.08
		MEND

; mask is not used in this macro                                      +v0.08
		MACRO
$label		LDRZ	$r,$from,$offset,$mask
$label		LDR	$r,[$from,#((ROUNDS + 1) * $offset)<<2]
		MEND

		MACRO
$label		STRW	$r,$from,$offset
$label		STRB	$r,[$from,#($offset << 1)+0]
		MOV	$r,$r,ROR #8		;get next byte
		STRB	$r,[$from,#($offset << 1)+1]
;		MOV	$r,$r,ROR #24		;reverse injury to parameter
		MEND

		STARTCODE	cipher_idea

		STMFD	sp !,{outblock,v1-v6,sl,fp,ip,lr}

;        x1=inblock[0];
;        x2=inblock[1];
;        x3=inblock[2];
;        x4=inblock[3];
		MOV	Zs1,inblock		;NB inblock = mask
		MOV	mask,#&10000
		ADD	maxim,mask,#1
		SUB	mask,mask,#1		;set up mask and maxim for idea_mul
		LDRW	x1,Zs1,0,mask
		LDRW	x2,Zs1,1,mask
		LDRW	x3,Zs1,2,mask
		LDRW	x4,Zs1,3,mask


;        for (r=0; r<ROUNDS; r++)
;        {
		MOV	r,#0
idea_cipher_loop

;                x1 = mul(x1, Z[0][r]);
		MOV	Zs1,x1
		LDRZ	Zs2,Z,0,mask
		idea_mul	Zs1,Zs2,x1,mask,maxim

;                x4 = mul(x4, Z[3][r]);
		MOV	Zs1,x4
		LDRZ	Zs2,Z,3,mask
		idea_mul	Zs1,Zs2,x4,mask,maxim

;                x2 = lower16(x2 + Z[1][r]);
		LDRZ	Zs1,Z,1,mask
		ADD	x2,x2,Zs1
		lower16	x2,mask

;                x3 = lower16(x3 + Z[2][r]);
		LDRZ	Zs1,Z,2,mask
		ADD	x3,x3,Zs1
		lower16	x3,mask

;                kk = mul(Z[4][r], (x1^x3));
		LDRZ	Zs1,Z,4,mask
		EOR	Zs2,x1,x3
		idea_mul	Zs1,Zs2,kk,mask,maxim

;                t1 = mul(Z[5][r], lower16(kk + (x2^x4)) );
		LDRZ	Zs1,Z,5,mask
		EOR	Zs2,x2,x4
		ADD	Zs2,Zs2,kk
		lower16	Zs2,mask
		idea_mul	Zs1,Zs2,t1,mask,maxim

;                t2 = lower16(kk + t1);
		ADD	t2,kk,t1
		lower16	t2,mask

;                x1 = x1^t1;
		EOR	x1,x1,t1

;                x4 = x4^t2;
		EOR	x4,x4,t2

;                a  = x2^t2;
		EOR	a,x2,t2

;                x2 = x3^t1;
		EOR	x2,x3,t1

;                x3 = a;
		MOV	x3,a

;        }
		ADD	Z,Z,#1<<2
		ADD	r,r,#1
		CMP	r,#ROUNDS
		BLT	idea_cipher_loop

;        outblock[0] = mul(x1, Z[0][ROUNDS]);
		LDRZ	Zs2,Z,0,mask
		idea_mul	x1,Zs2,t1,mask,maxim

;        outblock[3] = mul(x4, Z[3][ROUNDS]);
		LDRZ	Zs2,Z,3,mask
		idea_mul	x4,Zs2,t2,mask,maxim

;		LDMFD	sp,{outblock}		;restore outblock    -v0.08
		LDR	outblock,[sp]   ;restore outblock faster     ~v0.08

		STRW	t1,outblock,0
		STRW	t2,outblock,3

;        outblock[1] = lower16(x3 + Z[1][ROUNDS]);
		LDRZ	t1,Z,1,mask
		ADD	t1,t1,x3
		lower16	t1,mask
		STRW	t1,outblock,1

;        outblock[2] = lower16(x2 + Z[2][ROUNDS]);
		LDRZ	t1,Z,2,mask
		ADD	t1,t1,x2
		lower16	t1,mask
		STRW	t1,outblock,2

		LDMFD	sp !,{outblock,v1-v6,sl,fp,ip,pc}  ; removed ^ v0.08

		END
