/* GnarlPlot sprite plotter C code V1.09 17/9/06
   Copyright 2008 Jeffrey Lee
   This file is part of GnarlPlot.
   GnarlPlot is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.
   GnarlPlot is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   You should have received a copy of the GNU General Public License
   along with GnarlPlot.  If not, see <http://www.gnu.org/licenses/>. */

#ifndef _GP_SPRPLOT_C
#define _GP_SPRPLOT_C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "spr.h"
#include "col.h"
#include "WoumInclude:lib/sasm.h"
#include "WoumInclude:lib/error.h"

typedef struct { /* Must be the same as the one in simpspr.c! */
	int w,h,ox,oy;
	char d[0];
} _gp_simpspr;

/*
				REGISTER ALLOCATION
*/

static int freeregs; /* Bitmask containing registers free to use */
static char *regnames[16];

static void listregs(int f)
{
	int i;
	for (i=0;i<16;i++)
		if (f && (freeregs & (1 << i)))
			printf("R%d\n",i);
		else if (!f && ((freeregs & (1 << i)) == 0))
			printf("R%d = %s\n",i,regnames[i]);
}

/*static int countfree()
{
	int i,c;
	c = 0;
	for (i=0;i<16;i++)
		if (freeregs & (1 << i))
			c++;
	return c;
}*/

static int getreg(char *name) /* Get next free reg */
{
	int i;
	for (i=0;i<16;i++)
		if (freeregs & (1 << i))
		{
			freeregs -= 1 << i;
			regnames[i] = name;
			/*printf("R%d = %s\n",i,name);*/
			return i;
		}
	warning("Ran out of registers when trying to get %s",name);
	printf("list:\n");
	listregs(0);
	abort();
}

static void freereg(int i) /* Free specified reg */
{
	if (freeregs & (1 << i))
		error("Tried to free an already free register R%d (%s)",i,regnames[i]);
	freeregs |= 1 << i;
	/*printf("R%d (%s) freed\n",i,regnames[i]);*/
}

/*
				COLOUR CONVERSION
*/

static int *asm_gp_8_to_24(int *a,int in,int *out)
{
	int tmp = getreg("8_to_24 tmp");
	*out = getreg("8_to_24 out");
	sasm_alu3(a++,AL,AND,*out,in,3);
	sasm_alu(a++,AL,ORR,*out,*out,*out,LSL,8);
	sasm_alu(a++,AL,ORR,*out,*out,*out,LSL,8);
	sasm_alu3(a++,AL,AND,tmp,in,4);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,0,0);
	sasm_alu3(a++,AL,AND,tmp,in,16);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSR,1);
	sasm_alu3(a++,AL,AND,tmp,in,8);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSL,15);
	sasm_alu3(a++,AL,AND,tmp,in,128);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSL,12);
	sasm_alu3(a++,AL,AND,tmp,in,64+32);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSL,5);
	sasm_alu(a++,AL,ORR,*out,*out,*out,LSL,4);
	freereg(tmp);
	return a;
}

static int *asm_gp_8_to_16(int *a,int in,int *out)
{
	int tmp = getreg("8_to_16 tmp");
	*out = getreg("8_to_16 out");
	sasm_alu3(a++,AL,AND,*out,in,3);
	sasm_alu(a++,AL,MOV,*out,0,*out,LSL,1);
	sasm_alu(a++,AL,ORR,*out,*out,*out,LSL,5);
	sasm_alu(a++,AL,ORR,*out,*out,*out,LSL,5);
	sasm_alu3(a++,AL,AND,tmp,in,4);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSL,1);
	sasm_alu3(a++,AL,AND,tmp,in,16);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,0,0);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSR,4);
	sasm_alu3(a++,AL,AND,tmp,in,8);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSL,10);
	sasm_alu3(a++,AL,AND,tmp,in,128);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSL,7);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSL,3);
	sasm_alu3(a++,AL,AND,tmp,in,64+32);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSL,3);
	sasm_alu3(a++,AL,AND,tmp,in,64);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSR,1);
	freereg(tmp);
	return a;
}

static int *asm_gp_16_to_24(int *a,int in,int *out)
{
	int tmp = getreg("16_to_24 tmp");
	*out = getreg("16_to_24 out");
	sasm_alu3(a++,AL,AND,*out,in,31);
	sasm_alu(a++,AL,MOV,*out,0,*out,LSL,3);
	sasm_alu(a++,AL,ORR,*out,*out,*out,LSR,5);
	sasm_alu3(a++,AL,AND,tmp,in,31*32);
	sasm_alu(a++,AL,ORR,tmp,tmp,tmp,LSR,5);
	sasm_alu3(a++,AL,AND,tmp,tmp,255*4);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSL,6);
	sasm_alu3(a++,AL,AND,tmp,in,31*32*32);
	sasm_alu(a++,AL,ORR,tmp,tmp,tmp,LSR,5);
	sasm_alu(a++,AL,MOV,tmp,0,tmp,LSR,7);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSL,16);
	freereg(tmp);
	return a;
}

static int *asm_gp_16_to_8(int *a,int in,int *out)
{
	int tmp = getreg("16_to_8 tmp");
	*out = getreg("16_to_8 out");
	sasm_alu3(a++,AL,AND,*out,in,6);
	sasm_alu3(a++,AL,AND,tmp,in,6*32);
	sasm_alu(a++,AL,ADD,*out,*out,tmp,LSR,5);
	sasm_alu3(a++,AL,AND,tmp,in,6*32*32);
	sasm_alu(a++,AL,ADD,*out,*out,tmp,LSR,10);
	sasm_alu(a++,AL,ADD,*out,*out,*out,LSL,1);
	sasm_alu(a++,AL,MOV,*out,0,*out,LSR,4);
	sasm_alu3(a++,AL,AND,tmp,in,8);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSR,1);
	sasm_alu3(a++,AL,AND,tmp,in,16);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,0,0);
	sasm_alu3(a++,AL,AND,tmp,in,24*32);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSR,3);
	sasm_alu3(a++,AL,AND,tmp,in,8*32*32);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSR,10);
	sasm_alu3(a++,AL,AND,tmp,in,16*32*32);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSR,7);
	freereg(tmp);
	return a;
}

static int *asm_gp_24_to_16(int *a,int in,int *out)
{
	int tmp = getreg("24_to_16 tmp");
	*out = getreg("24_to_16 out");
	sasm_alu3(a++,AL,AND,*out,in,31*8);
	sasm_alu(a++,AL,MOV,*out,0,*out,LSR,3);
	sasm_alu3(a++,AL,AND,tmp,in,31*8*256);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSR,6);
	sasm_alu3(a++,AL,AND,tmp,in,31*8*256*256);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSR,9);
	freereg(tmp);
	return a;
}

static int *asm_gp_24_to_8(int *a,int in,int *out)
{
	int tmp = getreg("24_to_8 tmp");
	*out = getreg("24_to_8 out");
	sasm_alu3(a++,AL,AND,*out,in,48);
	sasm_alu3(a++,AL,AND,tmp,in,48*256);
	sasm_alu(a++,AL,ADD,*out,*out,tmp,LSR,8);
	sasm_alu3(a++,AL,AND,tmp,in,48*65536);
	sasm_alu(a++,AL,ADD,*out,*out,tmp,LSR,16);
	sasm_alu(a++,AL,ADD,*out,*out,*out,LSL,1);
	sasm_alu(a++,AL,MOV,*out,0,*out,LSR,7);
	sasm_alu3(a++,AL,AND,tmp,in,64);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSR,4);
	sasm_alu3(a++,AL,AND,tmp,in,128);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSR,3);
	sasm_alu3(a++,AL,AND,tmp,in,192*256);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSR,9);
	sasm_alu3(a++,AL,AND,tmp,in,64*65536);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSR,19);
	sasm_alu3(a++,AL,AND,tmp,in,128*65536);
	sasm_alu(a++,AL,ORR,*out,*out,tmp,LSR,16);
	freereg(tmp);
	return a;
}

static int *asm_gp_24_grey(int *a,int spr,int scr,int *out)
{
	int *l1;
	int grey = getreg("24_grey mask");
	int tmp = getreg("24_grey tmp");
	*out = getreg("24_grey out");
	sasm_alu(a++,AL,MOV,grey,0,spr,LSR,24);
	sasm_alu(a++,AL,ADD+S,grey,grey,grey,LSR,7);
	sasm_alu(a++,EQ,MOV,*out,0,spr,0,0);
	l1 = a++; /* Insert branch forward here */
	/* else calculate result */
	sasm_alu3(a++,AL,AND,*out,spr,0xFF); /* R */
	sasm_alu3(a++,AL,AND,tmp,scr,0xFF);
	sasm_mul(a++,AL,0,tmp,grey,tmp);
	sasm_alu(a++,AL,ADD,*out,*out,tmp,LSR,8);
	sasm_alu(a++,AL,MOV+S,*out,0,*out,LSL,24);
	sasm_alu(a++,AL,MOV,*out,0,*out,LSR,24);
	sasm_alu3(a++,CS,ORR,*out,*out,0xFF);
	sasm_alu3(a++,AL,AND,tmp,spr,0xFF00); /* G */
	sasm_alu(a++,AL,ORR,*out,*out,tmp,0,0);
	sasm_alu3(a++,AL,AND,tmp,scr,0xFF00);
	sasm_mul(a++,AL,0,tmp,grey,tmp);
	sasm_alu(a++,AL,MOV,tmp,0,tmp,LSR,16);
	sasm_alu(a++,AL,ADD,*out,*out,tmp,LSL,8);
	sasm_alu(a++,AL,MOV+S,*out,0,*out,LSL,16);
	sasm_alu(a++,AL,MOV,*out,0,*out,LSR,16);
	sasm_alu3(a++,CS,ORR,*out,*out,0xFF00);
	sasm_alu3(a++,AL,AND,tmp,spr,0xFF0000); /* B */
	sasm_alu(a++,AL,ORR,*out,*out,tmp,0,0);
	sasm_alu3(a++,AL,AND,tmp,scr,0xFF0000);
	sasm_mul(a++,AL,0,tmp,grey,tmp);
	sasm_alu(a++,AL,MOV,tmp,0,tmp,LSR,24);
	sasm_alu(a++,AL,ADD,*out,*out,tmp,LSL,16);
	sasm_alu(a++,AL,MOV+S,*out,0,*out,LSL,8);
	sasm_alu(a++,AL,MOV,*out,0,*out,LSR,8);
	sasm_alu3(a++,CS,ORR,*out,*out,0xFF0000);
	/* Insert branch */ sasm_b(l1,EQ,0,a);
	freereg(grey);
	freereg(tmp);
	return a;
}

static int *asm_gp_24_rgb(int *a,int spr,int rgb,int scr,int *out)
{
	int *l1;
	int tmp = getreg("24_rgb tmp");
	int tmp2 = getreg("24_rgb tmp2");
	*out = getreg("24_rgb out");
	sasm_alu3(a++,AL,AND,tmp,rgb,0xFF); /* R */
	sasm_alu(a++,AL,ADD+S,tmp,tmp,tmp,LSR,7);
	sasm_alu3(a++,AL,AND,*out,spr,0xFF);
	l1 = a++; /* Insert branch forward here */
	sasm_alu3(a++,AL,AND,tmp2,scr,0xFF);
	sasm_mul(a++,AL,0,tmp,tmp2,tmp);
	sasm_alu(a++,AL,ADD,*out,*out,tmp,LSR,8);
	sasm_alu(a++,AL,MOV+S,*out,0,*out,LSL,24);
	sasm_alu(a++,AL,MOV,*out,0,*out,LSR,24);
	sasm_alu3(a++,AL,ORR,*out,*out,0xFF);
	/* Insert branch */ sasm_b(l1,EQ,0,a);
	sasm_alu3(a++,AL,AND,tmp,rgb,0xFF00); /* G */
	sasm_alu(a++,AL,ADD+S,tmp,tmp,tmp,LSR,7);
	sasm_alu3(a++,AL,AND,tmp2,spr,0xFF00);
	sasm_alu(a++,AL,ORR,*out,*out,tmp2,0,0);
	l1 = a++; /* Insert branch forward here */
	sasm_alu3(a++,AL,AND,tmp2,scr,0xFF00);
	sasm_mul(a++,AL,0,tmp,tmp2,tmp);
	sasm_alu(a++,AL,MOV,tmp,0,tmp,LSR,16);
	sasm_alu(a++,AL,ADD,*out,*out,tmp,LSL,8);
	sasm_alu(a++,AL,MOV+S,*out,0,*out,LSL,16);
	sasm_alu(a++,AL,MOV,*out,0,*out,LSR,16);
	sasm_alu3(a++,AL,ORR,*out,*out,0xFF00);
	/* Insert branch */ sasm_b(l1,EQ,0,a);
	sasm_alu3(a++,AL,AND,tmp,rgb,0xFF0000); /* B */
	sasm_alu(a++,AL,ADD+S,tmp,tmp,tmp,LSR,7);
	sasm_alu3(a++,AL,AND,tmp2,spr,0xFF0000);
	sasm_alu(a++,AL,ORR,*out,*out,tmp2,0,0);
	l1 = a++; /* Insert branch forward here */
	sasm_alu3(a++,AL,AND,tmp2,scr,0xFF0000);
	sasm_mul(a++,AL,0,tmp,tmp2,tmp);
	sasm_alu(a++,AL,MOV,tmp,0,tmp,LSR,24);
	sasm_alu(a++,AL,ADD,*out,*out,tmp,LSL,16);
	sasm_alu(a++,AL,MOV+S,*out,0,*out,LSL,8);
	sasm_alu(a++,AL,MOV,*out,0,*out,LSR,8);
	sasm_alu3(a++,AL,ORR,*out,*out,0xFF0000);
	/* Insert branch */ sasm_b(l1,EQ,0,a);
	freereg(tmp);
	freereg(tmp2);
	return a;
}

static int *asm_gp_convert(int *a,int inps,int outps,int pix)
{
	int tmp;
	if (inps == outps)
		return a;
	if (inps == 0) {
		if (outps == 1)
			a = asm_gp_8_to_16(a,pix,&tmp);
		else
			a = asm_gp_8_to_24(a,pix,&tmp);
	} else if (inps == 1) {
		if (outps == 0)
			a = asm_gp_16_to_8(a,pix,&tmp);
		else
			a = asm_gp_16_to_24(a,pix,&tmp);
	} else {
		if (outps == 0)
			a = asm_gp_24_to_8(a,pix,&tmp);
		else
			a = asm_gp_24_to_16(a,pix,&tmp);
	}
	sasm_alu(a++,AL,MOV,pix,0,tmp,0,0);
	freereg(tmp);
	return a;
}

static int *asm_gp_setmaskon(int *a,int scr_ps,int scr_mt,int pix,int *pix2)
{
	/* This can probably be optimised because most pixels will already have
	   their mask bits clear from the colour conversion function */
	if (scr_mt == GP_MT_NONE)
		return a;
	if (scr_mt == GP_MT_ONOFF)
	{
		if (scr_ps == 0)
			sasm_alu3(a++,AL,ORR,pix,pix,0xFF00);
		else if (scr_ps == 1)
			sasm_alu3(a++,AL,ORR,pix,pix,0x8000);
		else
			sasm_alu3(a++,AL,ORR,pix,pix,0xFF000000);
	}
	else if (scr_mt == GP_MT_GREY)
	{
		if (scr_ps == 0)
			sasm_alu3(a++,AL,BIC,pix,pix,0xFF00);
		else if (scr_ps == 1)
			sasm_alu3(a++,AL,BIC,pix,pix,0xFF0000);
		else
			sasm_alu3(a++,AL,BIC,pix,pix,0xFF000000);
	}
	else
	{
		if (scr_ps == 0)
			sasm_alu3(a++,AL,BIC,pix,pix,0xFF00);
		else if (scr_ps == 1)
		{
			sasm_alu(a++,AL,MOV,pix,0,pix,LSL,16);
			sasm_alu(a++,AL,MOV,pix,0,pix,LSR,16);
		}
		else
		{
			*pix2 = getreg("24bpp GP_MT_RGB mask");
			sasm_alu3(a++,AL,MOV,*pix2,0,0);
		}
	}
	return a;
}

static int *asm_gp_seperatemask(int *a,int scr_ps,int scr_mt,int pix,int *pix2)
{
	/* Strip the mask from pix and place it in pix2
	   pix2 will be allocated if it isn't already */
	if (scr_mt == GP_MT_NONE)
		return a;
	if (scr_mt == GP_MT_ONOFF)
	{
		*pix2 = getreg("pixel mask");
		if (scr_ps == 0)
			sasm_alu(a++,AL,MOV,*pix2,0,pix,LSR,8);
		else if (scr_ps == 1)
			sasm_alu(a++,AL,MOV,*pix2,0,pix,LSR,15);
		else
			sasm_alu(a++,AL,MOV,*pix2,0,pix,LSR,24);
	}
	else if (scr_mt == GP_MT_GREY)
	{
		*pix2 = getreg("pixel mask");
		if (scr_ps == 0)
			sasm_alu(a++,AL,MOV,*pix2,0,pix,LSR,8);
		else if (scr_ps == 1)
			sasm_alu(a++,AL,MOV,*pix2,0,pix,LSR,16);
		else
			sasm_alu(a++,AL,MOV,*pix2,0,pix,LSR,24);
	}
	else
	{
		if (scr_ps == 0)
		{
			*pix2 = getreg("pixel mask");
			sasm_alu(a++,AL,MOV,*pix2,0,pix,LSR,8);
		}
		else if (scr_ps == 1)
		{
			*pix2 = getreg("pixel mask");
			sasm_alu(a++,AL,MOV,*pix2,0,pix,LSR,16);
		}
		/* ... else pix2 is already the mask */
	}
	return a;
}

static int *asm_gp_multmask(int *a,int scr_ps,int scr_mt,int msk,int msk2)
{
	int tmp;
	/* msk = greyscale sprite mask
	   msk2 = screen mask in scr_ps,scr_mt */
	if (scr_mt == GP_MT_ONOFF)
	{
		/* Just force the pixel on */
		if ((scr_ps == 0) || (scr_ps == 2))
			sasm_alu3(a++,AL,MOV,msk2,0,255);
		else
			sasm_alu3(a++,AL,MOV,msk2,0,1);
	}
	else if (scr_mt == GP_MT_GREY)
	{
		/* Simple multiply */
		sasm_mla(a++,AL,0,msk2,msk,msk2,msk2); /* (msk+1)*msk2 */
		sasm_alu(a++,AL,MOV,msk2,0,msk2,LSR,8); /* /256 */
	}
	else if (scr_mt == GP_MT_RGB)
	{
		a = asm_gp_convert(a,scr_ps,2,msk2); /* Promote screen mask to 24bpp */
		/* Need to multiply each component */
		tmp = getreg("multmask tmp");
		sasm_alu(a++,AL,MOV,tmp,0,msk2,LSR,16); /* Get blue */
		sasm_alu(a++,AL,MOV,msk2,0,msk2,ROR,16); /* GGRR00BB */
		sasm_mla(a++,AL,0,msk2,tmp,msk,msk2); /* GGRRBBxx */
		sasm_alu(a++,AL,MOV,msk2,0,msk2,LSR,8); /* 00GGRRBB */
		sasm_alu(a++,AL,MOV,tmp,0,msk2,LSR,16); /* Get green */
		sasm_alu(a++,AL,MOV,msk2,0,msk2,ROR,16); /* RRBB00GG */
		sasm_mla(a++,AL,0,msk2,tmp,msk,msk2); /* RRBBGGxx */
		sasm_alu(a++,AL,MOV,msk2,0,msk2,LSR,8); /* 00RRBBGG */
		sasm_alu(a++,AL,MOV,tmp,0,msk2,LSR,16); /* Get red */
		sasm_alu(a++,AL,MOV,msk2,0,msk2,ROR,16); /* BBGG00RR */
		sasm_mla(a++,AL,0,msk2,tmp,msk,msk2); /* BBGGRRxx */
		sasm_alu(a++,AL,MOV,msk2,0,msk2,LSR,8); /* 00BBGGRR */
		freereg(tmp);
		a = asm_gp_convert(a,2,scr_ps,msk2); /* demote screen mask */
	}
	return a;
}

static int *asm_gp_multmask24(int *a,int spr_ps,int scr_ps,int scr_mt,int msk,int msk2)
{
	int tmp,tmp2;
	/* msk = RGB sprite mask in spr_ps
	   msk2 = screen mask in scr_ps,scr_mt */
	if (scr_mt == GP_MT_ONOFF)
	{
		/* Just force the pixel on */
		if ((scr_ps == 0) || (scr_ps == 2))
			sasm_alu3(a++,AL,MOV,msk2,0,255);
		else
			sasm_alu3(a++,AL,MOV,msk2,0,1);
	}
	else if (scr_mt == GP_MT_GREY)
	{
		/* Need to convert msk to a greyscale mask so we can just multiply the two together */
		tmp = getreg("multmask tmp");
		sasm_alu3(a++,AL,AND,tmp,msk,255); /* R */
		sasm_alu3(a++,AL,BIC,msk,msk,255);
		sasm_alu(a++,AL,MOV,msk,0,msk,ROR,16); /* BB0000GG */
		sasm_alu(a++,AL,ADD,tmp,tmp,msk,0,0); /* BB000R+G */
		sasm_alu(a++,AL,ADD,tmp,tmp,msk,LSR,24); /* BB0R+G+B */
		sasm_alu3(a++,AL,BIC,tmp,tmp,0xFF000000); /* R+G+B */
		sasm_alu(a++,AL,ADD,msk,tmp,tmp,LSR,2); /* 101000000 */
		sasm_alu(a++,AL,ADD,msk,msk,msk,LSR,4); /* 101010100 */
		sasm_alu(a++,AL,ADD,msk,msk,msk,LSR,8); /* 101010101*3=1023 */
		sasm_alu3(a++,AL,ADD,msk,msk,1); /* 1024 */
		sasm_mla(a++,AL,0,msk2,msk,msk2,msk2); /* (msk+1)*msk2 */
		sasm_alu(a++,AL,MOV,msk2,0,msk2,LSR,10); /* /1024 */
		freereg(tmp);
	}
	else if (scr_mt == GP_MT_RGB)
	{
		/* Need to multiply each component */
		tmp = getreg("multmask tmp");
		tmp2 = getreg("multmask tmp2");
		sasm_alu(a++,AL,MOV,tmp,0,msk2,LSR,16); /* Get blue */
		sasm_alu(a++,AL,MOV,tmp2,0,msk,LSR,16);
		sasm_alu(a++,AL,MOV,msk2,0,msk2,ROR,16); /* GGRR00BB */
		sasm_mla(a++,AL,0,msk2,tmp,tmp2,msk2); /* GGRRBBxx */
		sasm_alu(a++,AL,MOV,msk2,0,msk2,LSR,8); /* 00GGRRBB */
		sasm_alu(a++,AL,MOV,tmp,0,msk2,LSR,16); /* Get green */
		sasm_alu(a++,AL,MOV,tmp2,0,msk,LSR,8);
		sasm_alu3(a++,AL,AND,tmp2,tmp2,255);
		sasm_alu(a++,AL,MOV,msk2,0,msk2,ROR,16); /* RRBB00GG */
		sasm_mla(a++,AL,0,msk2,tmp,tmp2,msk2); /* RRBBGGxx */
		sasm_alu(a++,AL,MOV,msk2,0,msk2,LSR,8); /* 00RRBBGG */
		sasm_alu(a++,AL,MOV,tmp,0,msk2,LSR,16); /* Get red */
		sasm_alu3(a++,AL,AND,tmp2,msk,255);
		sasm_alu(a++,AL,MOV,msk2,0,msk2,ROR,16); /* BBGG00RR */
		sasm_mla(a++,AL,0,msk2,tmp,tmp2,msk2); /* BBGGRRxx */
		sasm_alu(a++,AL,MOV,msk2,0,msk2,LSR,8); /* 00BBGGRR */
		freereg(tmp);
		freereg(tmp2);
	}
	return a;
}

static int *asm_gp_combinemask(int *a,int scr_ps,int scr_mt,int pix,int pix2)
{
	/* Recombine the mask & colour
	   Disposes of pix2 if no longer needed */
	if (scr_mt == GP_MT_NONE)
		return a;
	if (scr_mt == GP_MT_ONOFF)
	{
		if (scr_ps == 0)
			sasm_alu(a++,AL,ORR,pix,pix,pix2,LSL,8);
		else if (scr_ps == 1)
			sasm_alu(a++,AL,ORR,pix,pix,pix2,LSL,15);
		else
			sasm_alu(a++,AL,ORR,pix,pix,pix2,LSL,24);
		freereg(pix2);
	}
	else if (scr_mt == GP_MT_GREY)
	{
		if (scr_ps == 0)
			sasm_alu(a++,AL,ORR,pix,pix,pix2,LSL,8);
		else if (scr_ps == 1)
			sasm_alu(a++,AL,ORR,pix,pix,pix2,LSL,16);
		else
			sasm_alu(a++,AL,ORR,pix,pix,pix2,LSL,24);
		freereg(pix2);
	}
	else
	{
		if (scr_ps == 0)
		{
			sasm_alu(a++,AL,ORR,pix,pix,pix2,LSL,8);
			freereg(pix2);
		}
		else if (scr_ps == 1)
		{
			sasm_alu(a++,AL,ORR,pix,pix,pix2,LSL,16);
			freereg(pix2);
		}
		/* ... else pix2 is already the mask */
	}
	return a;
}

/*
				PIXEL READERS & WRITERS
*/

static int *asm_gp_loadpix(int *a,int buf,int ps,int mt,int *reg1,int *reg2,int inc)
{
	/* Load a pixel of (ps,mt) type from buf into reg1,reg2
	   The pixel won't be unpacked at all */
	ps = gp_col_trueps(ps,mt);
	if (ps == 0)
	{
		*reg1 = getreg("loadpix reg1");
		if (inc)
			sasm_mem2(a++,AL,B+U+L,*reg1,buf,1);
		else
			sasm_mem2(a++,AL,B+L+PRE,*reg1,buf,0);
	}
	else if (ps == 1)
	{
		*reg1 = getreg("loadpix reg1");
		if (sasm_capabilities & SASM_H)
		{
			if (inc)
				sasm_mem2(a++,AL,H+U+L,*reg1,buf,2);
			else
				sasm_mem2(a++,AL,H+L+PRE,*reg1,buf,0);
		}
		else
		{
			*reg2 = getreg("loadpix reg2");
			if (inc)
			{
				sasm_mem2(a++,AL,B+U+L,*reg1,buf,1);
				sasm_mem2(a++,AL,B+U+L,*reg2,buf,1);
			}
			else
			{
				sasm_mem2(a++,AL,B+L+PRE,*reg1,buf,0);
				sasm_mem2(a++,AL,B+L+U+PRE,*reg2,buf,1);
			}
			sasm_alu(a++,AL,ORR,*reg1,*reg1,*reg2,LSL,8);
			freereg(*reg2);
		}
	}
	else if (ps == 2)
	{
		*reg1 = getreg("loadpix reg1");
		if (inc)
			sasm_mem2(a++,AL,U+L,*reg1,buf,4);
		else
			sasm_mem2(a++,AL,L+PRE,*reg1,buf,0);
	}
	else /* ps 3 */
	{
		*reg1 = getreg("loadpix reg1");
		*reg2 = getreg("loadpix reg2");
		if (inc)
			sasm_mem4(a++,AL,L+IA+W,buf,(1 << *reg1)+(1 << *reg2)); /* Because getreg allocates the lowest registers first, the words will still be in the correct order */
		else
			sasm_mem4(a++,AL,L+IA,buf,(1 << *reg1)+(1 << *reg2));
	}
	return a;
}

static int *asm_gp_savepix(int *a,int buf,int ps,int mt,int reg1,int *reg2,int inc)
{
	/* Save a pixel of (ps,mt) type to buf from reg1, reg2 */
	ps = gp_col_trueps(ps,mt);
	if (ps == 0)
	{
		if (inc)
			sasm_mem2(a++,AL,B+U,reg1,buf,1);
		else
			sasm_mem2(a++,AL,B+PRE,reg1,buf,0);
	}
	else if (ps == 1)
	{
		if (sasm_capabilities & SASM_H)
		{
			if (inc)
				sasm_mem2(a++,AL,H+U,reg1,buf,2);
			else
				sasm_mem2(a++,AL,H+PRE,reg1,buf,0);
		}
		else if (inc)
		{
			sasm_mem2(a++,AL,B+U,reg1,buf,1);
			sasm_alu(a++,AL,MOV,reg1,0,reg1,LSR,8);
			sasm_mem2(a++,AL,B+U,reg1,buf,1);
		}
		else
		{
			sasm_mem2(a++,AL,B+PRE,reg1,buf,0);
			sasm_alu(a++,AL,MOV,reg1,0,reg1,LSR,8);
			sasm_mem2(a++,AL,B+U+PRE,reg1,buf,1);
		}
	}
	else if (ps == 2)
	{
		if (inc)
			sasm_mem2(a++,AL,U,reg1,buf,4);
		else
			sasm_mem2(a++,AL,PRE,reg1,buf,0);
	}
	else /* ps == 3 */
	{
		if (reg2 == 0)
			error("asm_gp_savepix called for trueps 3 pixel without a reg2");
		if (reg1 < *reg2) /* STM optimisation */
		{
			if (inc)
				sasm_mem4(a++,AL,IA+W,buf,(1 << reg1)+(1 << *reg2));
			else
				sasm_mem4(a++,AL,IA,buf,(1 << reg1)+(1 << *reg2));
		}
		else if (inc)
		{
			sasm_mem2(a++,AL,U,reg1,buf,4);
			sasm_mem2(a++,AL,U,*reg2,buf,4);
		}
		else
		{
			sasm_mem2(a++,AL,PRE,reg1,buf,0);
			sasm_mem2(a++,AL,U+PRE,*reg2,buf,4);
		}
		freereg(*reg2);
	}
	return a;
}

static int *asm_gp_spr_transfer(int *a,int c,int c2,int spr,int scr,int ps,int spr_mt,int scr_mt,int w,int cx)
{
	int regs[7];
	int r,regmask,maskmask,maskop;
	int *l1;
	/* Transfer 'w' words from spr to scr
	   Sprite masks allows are GP_MT_NONE and GP_MT_ONOFF */
	if (scr_mt == GP_MT_ONOFF)
		maskop = ORR; /* Set bits to enable pixel */
	else
		maskop = BIC; /* Else clear bits to enable pixel */
	regmask = 0;
	if (spr_mt == GP_MT_NONE)
	{
		/* Straight load & store */
		for (r=0;r<w;r++)
		{
			regs[r] = getreg("transfer data");
			regmask |= 1 << regs[r];
		}
		sasm_mem4(a++,c,L+IA+W,spr,regmask);
		/* Set mask bits if needed */
		if (scr_mt)
			for (r=0;r<w;r++)
			{
				if (ps == 0)
					sasm_alu3(a++,c,maskop,regs[r],regs[r],0xFF00);
				else if (ps == 1)
				{
					if (scr_mt == GP_MT_ONOFF)
					{
						sasm_alu3(a++,c,ORR,regs[r],regs[r],0x8000);
						sasm_alu3(a++,c,ORR,regs[r],regs[r],0x8000);
					}
					else
					{
						sasm_alu(a++,c,MOV,regs[r],0,regs[r],LSL,16);
						sasm_alu(a++,c,MOV,regs[r],0,regs[r],LSR,16);
					}
				}
				if ((ps == 0) || (ps == 2))
					sasm_alu3(a++,c,maskop,regs[r],regs[r],0xFF000000);
			}
		sasm_mem4(a++,c,IA+W,scr,regmask);
		for (r=0;r<w;r++)
			freereg(regs[r]);
		return a;
	}
	/* Else we need to load & check as we go */
	/* Do different plotters per ps */
	/* We'll need a branch instruction though to skip the code if were not plotting */
	l1 = a++;
	if (ps == 2)
	{
		/* Load all words, then only store the ones needed */
		for (r=0;r<w;r++)
		{
			regs[r] = getreg("transfer data");
			regmask |= 1 << regs[r];
		}
		sasm_mem4(a++,AL,L+IA+W,spr,regmask);
		for (r=0;r<w;r++)
		{
			sasm_alu3(a++,AL,TST,0,regs[r],0xFF000000);
			if (scr_mt > GP_MT_ONOFF) /* && ps == 2 */
				sasm_alu3(a++,NE,BIC,regs[r],regs[r],0xFF000000);
			sasm_mem2(a++,NE,U+PRE,regs[r],scr,r*4);
		}
		for (r=0;r<w;r++)
			freereg(regs[r]);
		sasm_alu3(a++,AL,ADD,scr,scr,w*4);
	}
	else
	{
		/* Load all words, then piss around working out which ones to store */
		for (r=0;r<w;r++)
		{
			regs[r] = getreg("transfer data");
			regmask |= 1 << regs[r];
		}
		regs[6] = getreg("transfer temp");
		sasm_mem4(a++,AL,L+IA+W,spr,regmask);
		if (ps == 0)
			maskmask = 0xFF00;
		else
			maskmask = 0x8000;
		for (r=0;r<w;r++)
		{
			/* Load the screen pixel if we need it */
			sasm_alu3(a++,AL,TST,0,regs[r],maskmask);
			sasm_alu3(a++,NE,TST,0,regs[r],maskmask << 16);
			sasm_mem2(a++,EQ,L+PRE+U,regs[6],scr,r*4);
			sasm_alu3(a++,AL,TST,0,regs[r],maskmask);
			sasm_alu(a++,EQ,MOV,regs[r],0,regs[r],LSR,16); /* Update sprite pixels, not the screen one. This'll mean we can STM it all back in one go. */
			sasm_alu(a++,EQ,ORR,regs[r],regs[r],regs[6],LSL,16);
			sasm_alu(a++,EQ,MOV,regs[r],0,regs[r],ROR,16);
			if (scr_mt > GP_MT_ONOFF) /* && ps == 0 */
				sasm_alu3(a++,NE,BIC,regs[r],regs[r],0xFF00);
			sasm_alu3(a++,AL,TST,0,regs[r],maskmask << 16);
			sasm_alu(a++,EQ,MOV,regs[r],0,regs[r],LSL,16);
			sasm_alu(a++,EQ,ORR,regs[r],regs[r],regs[6],LSR,16);
			sasm_alu(a++,EQ,MOV,regs[r],0,regs[r],ROR,16);
			if (scr_mt > GP_MT_ONOFF) /* && ps == 0 */
				sasm_alu3(a++,NE,BIC,regs[r],regs[r],0xFF000000);
		}
		sasm_mem4(a++,AL,IA+W,scr,regmask);
		for (r=0;r<w;r++)
			freereg(regs[r]);
		freereg(regs[6]);
	}
	/* Because we corrupted the flags, we might need to check again if cx == 0 */
	if (cx >= 0)
		sasm_alu3(a++,AL,CMP,0,cx,0);
	/* Insert branch */ sasm_b(l1,c2,0,a);
	return a;
}

/*
				PLOTTER BITS
*/

static int *asm_gp_round(int *a,int val,int lim,int shift)
{
	/* Round off a value by addition/subtraction until within range
	   In:
	   val = Register value to round off (fixed point, 'shift' fraction bits)
	   lim = Register upper limit to use
	   shift = Constant fixed point precision value
	   Out:
	   0 <= val < (lim << shift) */
	int *l1,*l2;
	sasm_alu(a++,AL,CMP,0,val,lim,LSL,shift);
	l2 = a++; /* Insert branch here */
	l1 = a; /* Loop point */
	sasm_alu(a++,GE,SUB,val,val,lim,LSL,shift); /* If >= then sub */
	sasm_alu(a++,LT,ADD,val,val,lim,LSL,shift); /* else if < and not lo then must add */
	sasm_alu(a++,AL,CMP,0,val,lim,LSL,shift);
	sasm_b(a++,HS,0,l1); /* Go back to loop if it isn't in range yet */
	/* Insert branch */ sasm_b(l2,LO,0,a);
	return a;
}

static int *asm_plotinit(int *a,int spr,int x,int y,int scr,int *spr_w,int *spr_h,int *spr_g,int *scr_w,int *scr_h,int *scr_g)
{
	int tmp = getreg("plotinit tmp");
	int tmp2 = getreg("plotinit tmp2");
	*spr_w = getreg("spr_w");
	*spr_h = getreg("spw_h");
	/* initialise plotting routine
	   In:
	   spr = gp_spr pointer
	   x,y = plot location
	   scr = gp_screen pointer
	   Out:
	   spr = sprite data pointer
	   x,y = plot location adjusted by sprite origin
	   scr = screen bank 0
	   spr_w,spr_h = sprite width & height
	   spr_g,scr_g = sprite & screen EOL gap
	   scr_w,scr_h = screen size */
	sasm_mem2(a++,AL,L+PRE+U,spr,spr,offsetof(gp_spr,d));
	sasm_mem2(a++,AL,L+PRE+U,*spr_w,spr,offsetof(_gp_simpspr,w));
	sasm_mem2(a++,AL,L+PRE+U,*spr_h,spr,offsetof(_gp_simpspr,h));
	sasm_mem2(a++,AL,L+PRE+U,tmp,spr,offsetof(_gp_simpspr,ox));
	sasm_mem2(a++,AL,L+PRE+U,tmp2,spr,offsetof(_gp_simpspr,oy));
	sasm_alu(a++,AL,SUB,x,x,tmp,0,0); /* Adjust for sprite center */
	sasm_alu(a++,AL,SUB,y,y,tmp2,0,0); freereg(tmp); freereg(tmp2);
	sasm_alu3(a++,AL,ADD,spr,spr,offsetof(_gp_simpspr,d)); /* Get data ptr */
	*scr_w = getreg("scr_w");
	*scr_h = getreg("scr_h");
	*scr_g = getreg("scr_g");
	*spr_g = getreg("spr_g");
	sasm_alu3(a++,AL,MOV,*spr_g,0,0); /* Sprite EOL gap */
	sasm_mem2(a++,AL,L+PRE+U,*scr_w,scr,offsetof(gp_screen,width));
	sasm_mem2(a++,AL,L+PRE+U,*scr_h,scr,offsetof(gp_screen,height));
	sasm_mem2(a++,AL,L+PRE+U,*scr_g,scr,offsetof(gp_screen,gap));
	sasm_mem2(a++,AL,L+PRE+U,scr,scr,offsetof(gp_screen,bank0));
	return a;
}

static int *asm_plotinit_t(int *a,int spr,int x,int y,int scr,int l,int sx,int sy,int xd,int yd,int spr_ps,int spr_mt,int scr_ps,int scr_mt)
{
	int tmp = getreg("plotinit_t tmp");
	int tmp2 = getreg("plotinit_t tmp2");
	/* Initialise a transformed plotter
	   Returns with spr as the sprite data pointer, x and y as the sprite
	   size, scr as the screen data pointer, l as the number of pixels to
	   draw, sx/sy as sprite start pos, and xd/yd as sprite delta, all
	   reduced to lie between 0 and the sprite width/height */
	sasm_alu3(a++,AL,CMP,0,x,0); /* Off left edge? */
	sasm_alu3(a++,LT,RSB,x,x,0); /* Yes, so invert length */
	sasm_alu(a++,LT,SUB,l,l,x,0,0); /* Decrease line length */
	sasm_mla(a++,LT,0,sx,x,xd,sx); /* Shift sprite pos */
	sasm_mla(a++,LT,0,sy,x,yd,sy);
	sasm_alu3(a++,LT,MOV,x,0,0); /* Reset x pos */
	sasm_mem2(a++,AL,L+PRE+U,tmp,scr,offsetof(gp_screen,width)); /* Get screen width */
	sasm_alu(a++,AL,ADD,tmp2,x,l,0,0); /* Right edge of line */
	sasm_alu(a++,AL,SUB+S,tmp2,tmp2,tmp,0,0); /* Off right edge of screen? */
	sasm_alu(a++,GT,SUB,l,l,tmp2,0,0); /* Yes, so decrease line length */
	sasm_mem2(a++,AL,L+PRE+U,tmp2,scr,offsetof(gp_screen,height));
	sasm_alu(a++,AL,CMP,0,y,tmp2,0,0); /* Off top/bottom of screen? */
	sasm_mem4(a++,HS,LFD+W,13,0x8FF0); /* Return if off screen */
	sasm_alu3(a++,AL,CMP,0,l,0); /* No line? */
	sasm_mem4(a++,LE,LFD+W,13,0x8FF0); /* Return */
	/* Else, calculate screen pos */
	/* tmp is still the screen width */
	sasm_mem2(a++,AL,L+PRE+U,tmp2,scr,offsetof(gp_screen,gap)); /* Get screen gap */
	sasm_alu(a++,AL,ADD,tmp2,tmp2,tmp,LSL,gp_col_trueps(scr_ps,scr_mt)); /* Get byte length of a line */
	sasm_mem2(a++,AL,L+PRE+U,tmp,scr,offsetof(gp_screen,bank0)); /* Get scr ptr */
	sasm_mla(a++,AL,0,scr,tmp2,y,tmp); /* screen + y offset */
	sasm_alu(a++,AL,ADD,scr,scr,x,LSL,gp_col_trueps(scr_ps,scr_mt)); /* Final screen ptr */
	/* x and y are no longer needed, so use them for the sprite size */
	sasm_mem2(a++,AL,L+PRE+U,spr,spr,offsetof(gp_spr,d)); /* Sprite data */
	sasm_mem2(a++,AL,L+PRE+U,x,spr,offsetof(_gp_simpspr,w));
	sasm_mem2(a++,AL,L+PRE+U,y,spr,offsetof(_gp_simpspr,h));
	/* Ignore sprite origin */
	sasm_alu3(a++,AL,ADD,spr,spr,offsetof(_gp_simpspr,d)); /* Get data ptr */
	/* Reduce sx,sy,dx,dy so that they are smaller than the sprite size */
	a = asm_gp_round(a,sx,x,16);
	a = asm_gp_round(a,sy,y,16);
	a = asm_gp_round(a,xd,x,16);
	a = asm_gp_round(a,yd,y,16);
	/* Finished! */
	freereg(tmp); freereg(tmp2);
	return a;
}

static int *asm_gp_spr_calcdraw(int *a,int x,int y,int spr,int spr_w,int spr_h,int spr_g,int spr_c,int scr,int scr_w,int scr_h,int scr_g,int scr_c)
{
	int tmp = getreg("calcdraw tmp");
	/* Calculate the vars needed to draw a sprite on a screen
	   In:
	   x,y = Register coords of where to plot sprite
	   spr = Register sprite pointer
	   spr_w,spr_h = Register size of sprite in pixels
	   spr_g = Register sprite end-of-line gap in bytes
	   spr_c = Constant sprite pixel size: 1 << spr_c = bytes per pixel
	   scr = Register screen pointer
	   scr_w,scr_h = Register screen dimensions in pixels
	   scr_g = Register screen end-of-line gap in bytes
	   scr_c = Constant screen pixel size: 1 << scr_c = bytes per pixel
	   Out:
	   spr,scr = pointers to required data start
	   spr_w,spr_h = size of area to be drawn (pixels)
	   spr_g,scr_g = new end-of-line gaps (bytes)
	   Plan:
	   1. Clip to bottom of screen
	   2. Clip to top of screen
	   3. Clip to right of screen
	   4. Clip to left of screen */
	/* Step 1 */
	sasm_alu(a++,AL,ADD,tmp,y,spr_h,0,0); /* Work out bottom edge of sprite */
	sasm_alu(a++,AL,SUB+S,tmp,tmp,scr_h,0,0); /* Off bottom? */
	sasm_alu(a++,GT,SUB,spr_h,spr_h,tmp,0,0); /* Decrease number of rows */
	/* Step 2 */
	sasm_alu3(a++,AL,CMP,0,y,0); /* Off top edge? */
	sasm_alu(a++,LT,ADD,spr_h,spr_h,y,0,0); /* Yes, so decrease number of rows */
	sasm_alu(a++,LT,ADD,tmp,spr_g,spr_w,LSL,spr_c); /* Work out sprite line length (Allows sprites to have gaps) */
	sasm_mul(a++,LT,0,tmp,y,tmp); /* Shift on spr ptr */
	sasm_alu(a++,LT,SUB,spr,spr,tmp,0,0);
	sasm_alu(a++,GT,ADD,tmp,scr_g,scr_w,LSL,scr_c); /* Work out screen line length */
	sasm_mla(a++,GT,0,scr,y,tmp,scr); /* Shift on screen ptr if on screen */
	/* Step 3 */
	sasm_alu(a++,AL,ADD,tmp,x,spr_w,0,0); /* Work out right edge of sprite */
	sasm_alu(a++,AL,SUB+S,tmp,tmp,scr_w,0,0); /* Off right edge? */
	sasm_alu(a++,GT,ADD,spr_g,spr_g,tmp,LSL,spr_c); /* Yes, so increase sprite gap */
	sasm_alu(a++,GT,SUB,spr_w,spr_w,tmp,0,0); /* And decrease row length */
	sasm_alu(a++,LT,SUB,scr_g,scr_g,tmp,LSL,scr_c); /* Increase screen gap */
	/* Step 4 */
	sasm_alu3(a++,AL,CMP,0,x,0); /* Off left edge? */
	sasm_alu(a++,LT,SUB,spr_g,spr_g,x,LSL,spr_c); /* Yes, so increase sprite gap */
	sasm_alu(a++,LT,ADD,spr_w,spr_w,x,0,0); /* And decrease row length */
	sasm_alu(a++,LT,SUB,spr,spr,x,LSL,spr_c); /* And shift on sprite ptr */
	sasm_alu(a++,GT,ADD,scr,scr,x,LSL,scr_c); /* Else shift on screen ptr */
	sasm_alu(a++,GT,ADD,scr_g,scr_g,x,LSL,scr_c); /* And increase screen gap */
	freereg(tmp);
	return a;
}

static int *asm_gp_spr_calcdraw_pre(int *a,int x,int y,int spr,int spr_w,int spr_h,int spr_g,int scr,int scr_w,int scr_h,int scr_g,int ps)
{
	int tmp = getreg("calcdraw_pre tmp");
	int tmp2 = getreg("calcdraw_pre tmp2");
	/* Calculate the vars needed to draw a sprite on a screen
	   In:
	   x,y = Register coords of where to plot sprite
	   spr = Register sprite pointer
	   spr_w,spr_h = Register size of sprite in pixels, including preshift column
	   spr_g = Register sprite end-of-line gap in bytes
	   scr = Register screen pointer
	   scr_w,scr_h = Register screen dimensions in pixels
	   scr_g = Register screen end-of-line gap in bytes
	   ps = Constant spr/scr pixel size: 1 << ps = bytes per pixel
	   Out:
	   spr,scr = pointers to required data start
	   spr_w,spr_h = size of area to be drawn (pixels)
	   spr_g,scr_g = new end-of-line gaps (bytes)
	   x,y = start/end bit masks for the first/last words: scr = (scr & mask) | (spr & ~mask)
	   Plan:
	   1. Advance to correct preshift number
	   2. Clip to bottom of screen
	   3. Clip to top of screen
	   4. Clip to right of screen
	   5. Clip to left of screen
	   6. Decode the bit masks from the data held in tmp2 */
	/* Step 1 */
	sasm_alu3(a++,AL,AND+S,tmp2,x,3);
	if (ps == 0)
		sasm_alu3(a++,EQ,MOV,tmp,0,0);
	sasm_alu(a++,NE,ADD,tmp,spr_g,spr_w,LSL,ps); /* Line length */
	sasm_mul(a++,NE,0,tmp,spr_h,tmp); /* Sprite size */
	if (ps == 1)
		sasm_alu(a++,NE,ADD,spr,spr,tmp,0,0); /* Preshift 1 */
	else {
		sasm_alu3(a++,NE,CMP,0,tmp2,2);
		sasm_alu(a++,NE,ADD,spr,spr,tmp,0,0); /* shift 1 or 3 */
		sasm_alu(a++,GE,ADD,spr,spr,tmp,LSL,1); /* shift 2 or 3 */
	}
	sasm_alu3(a++,AL,BIC,x,x,3); /* Clear lower bits of adr */
	/* Step 2 */
	sasm_alu(a++,AL,ADD,tmp,y,spr_h,0,0); /* Work out bottom edge of sprite */
	sasm_alu(a++,AL,SUB+S,tmp,tmp,scr_h,0,0); /* Off bottom? */
	sasm_alu(a++,GT,SUB,spr_h,spr_h,tmp,0,0); /* Decrease number of rows */
	/* Step 3 */
	sasm_alu3(a++,AL,CMP,0,y,0); /* Off top edge? */
	sasm_alu(a++,LT,ADD,spr_h,spr_h,y,0,0); /* Yes, so decrease number of rows */
	sasm_alu(a++,LT,ADD,tmp,spr_g,spr_w,LSL,ps); /* Work out sprite line length (Allows sprites to have gaps) */
	sasm_mul(a++,LT,0,tmp,y,tmp); /* Shift on spr ptr */
	sasm_alu(a++,LT,SUB,spr,spr,tmp,0,0);
	sasm_alu(a++,GT,ADD,tmp,scr_g,scr_w,LSL,ps); /* Work out screen line length */
	sasm_mla(a++,GT,0,scr,y,tmp,scr); /* Shift on screen ptr if on screen */
	/* Step 4 */
	sasm_alu(a++,AL,ADD,tmp,x,spr_w,0,0); /* Work out right edge of sprite */
	sasm_alu(a++,AL,SUB+S,tmp,tmp,scr_w,0,0); /* Off right edge? */
	sasm_alu(a++,GT,ADD,spr_g,spr_g,tmp,LSL,ps); /* Yes, so increase sprite gap */
	sasm_alu(a++,GT,SUB,spr_w,spr_w,tmp,0,0); /* And decrease row length */
	sasm_alu3(a++,GT,ORR,tmp2,tmp2,4); /* Set flag to disable right hand masking */
	sasm_alu(a++,LT,SUB,scr_g,scr_g,tmp,LSL,ps); /* Increase screen gap */
	/* Step 5 */
	sasm_alu3(a++,AL,CMP,0,x,0); /* Off left edge? */
	sasm_alu(a++,LT,SUB,spr_g,spr_g,x,LSL,ps); /* Yes, so increase sprite gap */
	sasm_alu(a++,LT,ADD,spr_w,spr_w,x,0,0); /* And decrease row length */
	sasm_alu(a++,LT,SUB,spr,spr,x,LSL,ps); /* And shift on sprite ptr */
	sasm_alu3(a++,LT,ORR,tmp2,tmp2,8); /* Set flag to disable left hand masking */
	sasm_alu(a++,GT,ADD,scr,scr,x,LSL,ps); /* Else shift on screen ptr */
	sasm_alu(a++,GT,ADD,scr_g,scr_g,x,LSL,ps); /* And increase screen gap */
	/* Step 6 */
	/* tmp2 is of the format 'LROO' where:
	      L is the lefthand mask disable flag (i.e. set to 00000000)
	      R is the righthand mask disable flag (i.e. set to 00000000)
	      O is the offset #
	   ofs	left mask	right mask
	   0	00000000	ffffffff
	   1	000000ff	ffffff00
	   2	0000ffff	ffff0000
	   3	00ffffff	ff000000
	*/
	/* calc right mask */
	sasm_alu3(a++,AL,MVN,y,0,0); /* 0xffffffff */
	if (ps == 0) {
		sasm_alu3(a++,AL,TST,0,tmp2,1);
		sasm_alu(a++,NE,MOV,y,0,y,LSL,8);
	}
	sasm_alu3(a++,AL,TST,0,tmp2,2);
	sasm_alu(a++,NE,MOV,y,0,y,LSL,16);
	/* set left mask */
	sasm_alu3(a++,AL,TST,0,tmp2,8);
	sasm_alu3(a++,NE,MOV,x,0,0); /* No left mask */
	sasm_alu(a++,EQ,MVN,x,0,y,0,0); /* Inv. of right mask */
	/* set right mask */
	sasm_alu3(a++,AL,TST,0,tmp2,4);
	sasm_alu3(a++,NE,MOV,y,0,0); /* No right mask */
	/* If only one column is visible and the left edge isn't, inherit the right edge's mask */
	/* This can be simplified to ORRing the two masks together if 1 column is visible, since no more than one mask will be nonzero */
	sasm_alu3(a++,AL,CMP,0,spr_w,4/(ps+1));
	sasm_alu(a++,EQ,ORR,x,x,y,0,0);
	freereg(tmp); freereg(tmp2);
	return a;
}

static int *asm_gp_sprdraw_epilogue(int *a,int cw,int h,int spr,int spr_g,int scr,int scr_g,int pixdrawn,int reg,int *xloop,int *yloop)
{
	/* Epilogue code for main plotting loop
	   In:
	   cw = Register current width of line left to plot (pixels)
	   h = Register height of area to plot (pixels)
	   spr,scr = Register sprite/screen ptrs
	   spr_g,scr_g = Register sprite/screen end-of-line gaps (bytes)
	   pixdrawn = Register/constant number of pixels drawn during the loop body
	   reg = 1 if pixdrawn is a register, 0 if constant
	   xloop = Location of x loop point
	   yloop = Location of y loop point
	   Out:
	   cw,h,spr,scr updated as necessary */
	if (xloop) {
		if (reg) /* Decrease number of pixels to draw */
			sasm_alu(a++,AL,SUB+S,cw,cw,pixdrawn,0,0);
		else if (pixdrawn)
			sasm_alu3(a++,AL,SUB+S,cw,cw,pixdrawn);
		else /* 0 pixels drawn, so just CMP with 0 */
			sasm_alu3(a++,AL,CMP,0,cw,0);
		sasm_b(a++,GT,0,xloop); /* Branch if more left */
	}
	sasm_alu(a++,AL,ADD,spr,spr,spr_g,0,0); /* Else advance ptrs to next row */
	sasm_alu(a++,AL,ADD,scr,scr,scr_g,0,0);
	sasm_alu3(a++,AL,SUB+S,h,h,1); /* Decrease row count */
	sasm_b(a++,GT,0,yloop); /* And go round again */
	return a;
}


static int *asm_gp_genplot_simple(int *a,int spr_ps,int spr_mt,int scr_ps,int scr_mt,int spr,int x,int y,int scr,int spr_w,int spr_h,int spr_g,int scr_w,int scr_h,int scr_g)
{
	/* Generate code for plotting one pixel at a time */
	int *xloop,*yloop;
	int cx; /* X copy */
	int pix1,pix2,pix3,pix4,pix5; /* pixel data */
	int *skip; /* inner loop skip point thingy */
	a = asm_gp_spr_calcdraw(a,x,y,spr,spr_w,spr_h,spr_g,gp_col_trueps(spr_ps,spr_mt),scr,scr_w,scr_h,scr_g,gp_col_trueps(scr_ps,scr_mt));
	/* Vars are now set up for the main plot loop:
	   spr,scr = pointers
	   spr_w,spr_h = size
	   spr_g,scr_g = end-of-line gaps */
	/* So free all other regs */
	freereg(x); freereg(y); freereg(scr_w); freereg(scr_h);
	sasm_alu3(a++,AL,CMP,0,spr_w,0);
	sasm_alu3(a++,GT,CMP,0,spr_h,0);
	sasm_alu3(a++,LE,MOV,0,0,0);
	sasm_mem4(a++,LE,LFD+W,13,0x8FF0); /* Exit if nothing to draw */
	/* Plotter prologue */
	yloop = a;
	cx = getreg("cx");
	sasm_alu(a++,AL,MOV,cx,0,spr_w,0,0); /* Copy sprite width */
	xloop = a;
	/* Plotter main */
	a = asm_gp_loadpix(a,spr,spr_ps,spr_mt,&pix1,&pix2,1); /* Load pixel */
	if (spr_mt == GP_MT_NONE)
	{
		a = asm_gp_convert(a,spr_ps,scr_ps,pix1); /* Convert to screen format */
		a = asm_gp_setmaskon(a,scr_ps,scr_mt,pix1,&pix2); /* Turn mask on */
		a = asm_gp_savepix(a,scr,scr_ps,scr_mt,pix1,&pix2,1); /* Store pixel */
		freereg(pix1);
	} else if (spr_mt == GP_MT_ONOFF) {
		/* Check mask state then convert & store if needed */
		if (spr_ps == 0)
			sasm_alu3(a++,AL,TST,0,pix1,0xFF00);
		else if (spr_ps == 1)
			sasm_alu3(a++,AL,TST,0,pix1,0x8000);
		else
			sasm_alu3(a++,AL,TST,0,pix1,0xFF000000);
		sasm_alu3(a++,EQ,ADD,scr,scr,1 << gp_col_trueps(scr_ps,scr_mt)); /* Advance screen ptr if not drawing pixel */
		skip = a++; /* Place the branch forward here */
		/* Else pixel is getting drawn, so convert & store it */
		a = asm_gp_convert(a,spr_ps,scr_ps,pix1);
		a = asm_gp_setmaskon(a,scr_ps,scr_mt,pix1,&pix2); /* Turn mask on */
		a = asm_gp_savepix(a,scr,scr_ps,scr_mt,pix1,&pix2,1);
		/* Insert branch */ sasm_b(skip,EQ,0,a);
		freereg(pix1);
	} else if (spr_mt == GP_MT_GREY) {
		/* Need to convert to 24bpp */
		if (spr_ps == 0) {
			pix2 = getreg("pix2"); /* Temp copy of mask */
			sasm_alu(a++,AL,MOV,pix2,0,pix1,LSR,8);
			a = asm_gp_convert(a,0,2,pix1);
			sasm_alu(a++,AL,ORR,pix1,pix1,pix2,LSL,24);
			freereg(pix2);
		} else if (spr_ps == 1) {
			pix2 = getreg("pix2");
			sasm_alu(a++,AL,MOV,pix2,0,pix1,LSR,16);
			a = asm_gp_convert(a,1,2,pix1);
			sasm_alu(a++,AL,ORR,pix1,pix1,pix2,LSL,24);
			freereg(pix2);
		}
		if (scr_mt == 0) /* No mask */
		{
			a = asm_gp_loadpix(a,scr,scr_ps,0,&pix2,&pix3,0);
			a = asm_gp_convert(a,scr_ps,2,pix2); /* Make it ps 2 */
			a = asm_gp_24_grey(a,pix1,pix2,&pix3); /* Apply mask */
			freereg(pix1); freereg(pix2);
			a = asm_gp_convert(a,2,scr_ps,pix3); /* Convert back */
			a = asm_gp_savepix(a,scr,scr_ps,scr_mt,pix3,0,1); /* Store */
			freereg(pix3);
		}
		else
		{
			/* some kind of mask */
			a = asm_gp_loadpix(a,scr,scr_ps,scr_mt,&pix2,&pix3,0); /* Load screen pixel */
			a = asm_gp_seperatemask(a,scr_ps,scr_mt,pix2,&pix3); /* seperate screen mask from colour if not already */
			a = asm_gp_convert(a,scr_ps,2,pix2); /* Convert screen colour to 24bpp */
			a = asm_gp_24_grey(a,pix1,pix2,&pix4); /* Apply mask to screen colour */
			freereg(pix2); /* Dispose of screen colour */ 
			sasm_alu(a++,AL,MOV,pix1,0,pix1,LSR,24); /* Get sprite mask value */
			a = asm_gp_multmask(a,scr_ps,scr_mt,pix1,pix3); /* Now pix3 is the adjusted screen mask */
			freereg(pix1); /* Dispose of sprite mask */
			a = asm_gp_combinemask(a,scr_ps,scr_mt,pix4,pix3); /* Stick mask back onto pixel */
			a = asm_gp_savepix(a,scr,scr_ps,scr_mt,pix4,&pix3,1); /* Store pixel */
			freereg(pix4);
		}
	} else {
		/* Similar to above, need pix1 and pix2 as 24bpp */
		if (spr_ps == 0) {
			pix2 = getreg("pix2");
			sasm_alu(a++,AL,MOV,pix2,0,pix1,LSR,8);
			a = asm_gp_convert(a,0,2,pix1);
			a = asm_gp_convert(a,0,2,pix2);
		} else if (spr_ps == 1) {
			pix2 = getreg("pix2");
			sasm_alu(a++,AL,MOV,pix2,0,pix1,LSR,16);
			a = asm_gp_convert(a,0,2,pix1);
			a = asm_gp_convert(a,0,2,pix2);
		}
		if (scr_mt == 0)
		{
			/* Now load screen & convert to 24bpp */
			a = asm_gp_loadpix(a,scr,scr_ps,0,&pix3,&pix4,0);
			a = asm_gp_convert(a,scr_ps,2,pix3);
			a = asm_gp_24_rgb(a,pix1,pix2,pix3,&pix4);
			/* Convert back & store */
			freereg(pix1); freereg(pix2); freereg(pix3);
			a = asm_gp_convert(a,2,scr_ps,pix4);
			a = asm_gp_savepix(a,scr,scr_ps,scr_mt,pix4,0,1);
			freereg(pix4);
		}
		else
		{
			/* some kind of mask */
			a = asm_gp_loadpix(a,scr,scr_ps,scr_mt,&pix3,&pix4,0); /* load screen pixel */
			a = asm_gp_seperatemask(a,scr_ps,scr_mt,pix3,&pix4); /* seperate screen mask if not already */
			a = asm_gp_convert(a,scr_ps,2,pix3); /* convert screen colour to 24bpp */
			a = asm_gp_24_rgb(a,pix1,pix2,pix3,&pix5); /* Apply sprite mask to screen colour */
			freereg(pix1); freereg(pix3); /* Dispose of sprite colour, screen colour */
			a = asm_gp_multmask24(a,spr_ps,scr_ps,scr_mt,pix2,pix4); /* Adjust screen mask */
			freereg(pix2); /* Dispose of sprite mask */
			a = asm_gp_combinemask(a,scr_ps,scr_mt,pix5,pix4); /* Stick mask back onto pixel */
			a = asm_gp_savepix(a,scr,scr_ps,scr_mt,pix5,&pix4,1); /* Store pixel */
			freereg(pix5);
		}
	}
	/* Insert loop epilogue */
	a = asm_gp_sprdraw_epilogue(a,cx,spr_h,spr,spr_g,scr,scr_g,1,0,xloop,yloop);
	sasm_alu3(a++,AL,MOV,0,0,0);
	sasm_mem4(a++,AL,LFD+W,13,0x8FF0); /* Return from function */
	return a;
}

static int *asm_gp_genplot_block(int *a,int ps,int spr_mt,int scr_mt,int spr,int x,int y,int scr,int spr_w,int spr_h,int spr_g,int scr_w,int scr_h,int scr_g)
{
	/* Generate plotter for when sprite & screen have same colour depth,
	   sprte has some kind of simple (GP_MT_NONE or GP_MT_ONOFF) mask,
	   and both have the same true pixel size */
	int tps = gp_col_trueps(ps,spr_mt);
	int *l1,*xloop,*yloop;
	int cx,pix1,pix2;
	l1 = 0;
	a = asm_gp_spr_calcdraw(a,x,y,spr,spr_w,spr_h,spr_g,tps,scr,scr_w,scr_h,scr_g,tps);
	/* Vars are now set up for the main plot loop:
	   spr,scr = pointers
	   spr_w,spr_h = size
	   spr_g,scr_g = end-of-line gaps */
	freereg(x); freereg(y); freereg(scr_w); freereg(scr_h);
	sasm_alu3(a++,AL,CMP,0,spr_w,0);
	sasm_alu3(a++,GT,CMP,0,spr_h,0);
	sasm_alu3(a++,LE,MOV,0,0,0);
	sasm_mem4(a++,LE,LFD+W,13,0x8FF0); /* Exit if nothing to draw */
	/* Check if input & output are aligned
	   We want the current pointers to be on word boundaries
	   And the length to be a multiple of a word
	   And the gap to be a multiple of a word */
	if (tps < 2)
	{
		sasm_alu3(a++,AL,TST,0,spr,3);
		sasm_alu3(a++,EQ,TST,0,scr,3);
		sasm_alu3(a++,EQ,TST,0,spr_w,(tps?1:3));
		sasm_alu3(a++,EQ,TST,0,spr_g,3);
		sasm_alu3(a++,EQ,TST,0,scr_g,3);
		l1 = a++; /* Insert branch to pixel-by-pixel code */
	}
	/* Plotter prologue */
	yloop = a;
	cx = getreg("cx"); /* cx = count of how many words of data to shift */
	/* There should be 7 registers free in total */
	/* So start with a loop doing 7 words at once
	   Then do the finishing 4, 2 and 1 words
	   (4+2+1=7, so all events are catered for) */
	sasm_alu(a++,AL,MOV,cx,0,spr_w,LSR,2-tps); /* Get count in words */
	xloop = a;
	/* If sprite has mask & not ps 2, we can only do 6 words at once since one is needed as temp storage */
	if ((spr_mt) && (ps != 2))
		pix1 = 6;
	else
		pix1 = 7;
	sasm_alu3(a++,AL,SUB+S,cx,cx,pix1);
	a = asm_gp_spr_transfer(a,GE,LT,spr,scr,ps,spr_mt,scr_mt,pix1,cx);
	sasm_b(a++,GT,0,xloop);
	/* Else <7 words remaining, and cx will be <=0 */
	sasm_alu3(a++,LT,ADD,cx,cx,pix1); /* Make positive again */
	/* Check for 1 word transfers */
	sasm_alu(a++,AL,MOV+S,cx,0,cx,LSR,1);
	a = asm_gp_spr_transfer(a,CS,CC,spr,scr,ps,spr_mt,scr_mt,1,-1);
	/* Two word */
	sasm_alu(a++,AL,MOV+S,cx,0,cx,LSR,1);
	a = asm_gp_spr_transfer(a,CS,CC,spr,scr,ps,spr_mt,scr_mt,2,-1);
	/* Four word */
	sasm_alu(a++,AL,MOV+S,cx,0,cx,LSR,1);
	a = asm_gp_spr_transfer(a,CS,CC,spr,scr,ps,spr_mt,scr_mt,4,-1);
	/* Now the loop epilogue */
	a = asm_gp_sprdraw_epilogue(a,cx,spr_h,spr,spr_g,scr,scr_g,0,0,0,yloop);
	sasm_alu3(a++,AL,MOV,0,0,0);
	sasm_mem4(a++,AL,LFD+W,13,0x8FF0); /* Return from function */
	/* Now the boring, 1-pixel approach */
	if (tps == 2)
		return a;
	/* Insert branch */ sasm_b(l1,NE,0,a);
	yloop = a;
	sasm_alu(a++,AL,MOV,cx,0,spr_w,0,0); /* Count in pixels */
	xloop = a;
	/* Plotter main */
	a = asm_gp_loadpix(a,spr,ps,spr_mt,&pix1,&pix2,1); /* Load pixel */
	if (spr_mt == GP_MT_NONE)
	{
		a = asm_gp_setmaskon(a,ps,scr_mt,pix1,&pix2); /* Turn mask on */
		a = asm_gp_savepix(a,scr,ps,scr_mt,pix1,&pix2,1); /* Store pixel */
		freereg(pix1);
	} else if (spr_mt == GP_MT_ONOFF) {
		/* Check mask state then convert & store if needed */
		if (ps == 0)
			sasm_alu3(a++,AL,TST,0,pix1,0xFF00);
		else if (ps == 1)
			sasm_alu3(a++,AL,TST,0,pix1,0x8000);
		else
			sasm_alu3(a++,AL,TST,0,pix1,0xFF000000);
		sasm_alu3(a++,EQ,ADD,scr,scr,1 << tps); /* Advance screen ptr if not drawing pixel */
		l1 = a++; /* Place the branch forward here */
		/* Else pixel is getting drawn, so store it */
		if (scr_mt > GP_MT_ONOFF)
			a = asm_gp_setmaskon(a,ps,scr_mt,pix1,&pix2); /* Turn mask on if not already */
		a = asm_gp_savepix(a,scr,ps,scr_mt,pix1,&pix2,1);
		/* Insert branch */ sasm_b(l1,EQ,0,a);
		freereg(pix1);
	}
	a = asm_gp_sprdraw_epilogue(a,cx,spr_h,spr,spr_g,scr,scr_g,1,0,xloop,yloop);
	sasm_alu3(a++,AL,MOV,0,0,0);
	sasm_mem4(a++,AL,LFD+W,13,0x8FF0); /* Return from function */
	return a;
}

static int *asm_gp_genplot_transformed(int *a,int spr,int spr_w,int spr_h,int scr,int l,int sx,int sy,int xd,int yd,int spr_ps,int spr_mt,int scr_ps,int scr_mt)
{
	/* Do a transformed plot
	   l will be >0 on entry
	   Only realistic way of doing this is to copy pixels one at a time */
	int *loop; /* Loop point */
	int sprof,tmp; /* Sprite offset (aka pixel pointer), temp reg */
	int pix1,pix2,pix3,pix4,pix5; /* Pixel registers */
	int *skip; /* Skip ptr for if pixel not stored to screen */
	sprof = getreg("sprof");
	tmp = getreg("tmp");
	loop = a; /* Main loop point */
	/* Start loop by calculating sprite pos */
	sasm_alu(a++,AL,MOV,sprof,0,sy,LSR,16); /* Real Y pos */
	sasm_alu(a++,AL,MOV,tmp,0,spr_w,LSL,gp_col_trueps(spr_ps,spr_mt)); /* Line length, bytes */
	sasm_mla(a++,AL,0,sprof,tmp,sprof,spr); /* Line ptr */
	sasm_alu(a++,AL,MOV,tmp,0,sx,LSR,16); /* X pos */
	sasm_alu(a++,AL,ADD,sprof,sprof,tmp,LSL,gp_col_trueps(spr_ps,spr_mt)); /* Pixel ptr */
	freereg(tmp);
	/* Now load pixel; code copied from genplot_simple */
	a = asm_gp_loadpix(a,sprof,spr_ps,spr_mt,&pix1,&pix2,1);
	freereg(sprof);
	if (spr_mt == GP_MT_NONE)
	{
		a = asm_gp_convert(a,spr_ps,scr_ps,pix1); /* Convert to screen format */
		a = asm_gp_setmaskon(a,scr_ps,scr_mt,pix1,&pix2); /* Turn mask on */
		a = asm_gp_savepix(a,scr,scr_ps,scr_mt,pix1,&pix2,1); /* Store pixel */
		freereg(pix1);
	} else if (spr_mt == GP_MT_ONOFF) {
		/* Check mask state then convert & store if needed */
		if (spr_ps == 0)
			sasm_alu3(a++,AL,TST,0,pix1,0xFF00);
		else if (spr_ps == 1)
			sasm_alu3(a++,AL,TST,0,pix1,0x8000);
		else
			sasm_alu3(a++,AL,TST,0,pix1,0xFF000000);
		sasm_alu3(a++,EQ,ADD,scr,scr,1 << gp_col_trueps(scr_ps,scr_mt)); /* Advance screen ptr if not drawing pixel */
		skip = a++; /* Place the branch forward here */
		/* Else pixel is getting drawn, so convert & store it */
		a = asm_gp_convert(a,spr_ps,scr_ps,pix1);
		a = asm_gp_setmaskon(a,scr_ps,scr_mt,pix1,&pix2); /* Turn mask on */
		a = asm_gp_savepix(a,scr,scr_ps,scr_mt,pix1,&pix2,1);
		/* Insert branch */ sasm_b(skip,EQ,0,a);
		freereg(pix1);
	} else if (spr_mt == GP_MT_GREY) {
		/* Need to convert to 24bpp */
		if (spr_ps == 0) {
			pix2 = getreg("pix2"); /* Temp copy of mask */
			sasm_alu(a++,AL,MOV,pix2,0,pix1,LSR,8);
			a = asm_gp_convert(a,0,2,pix1);
			sasm_alu(a++,AL,ORR,pix1,pix1,pix2,LSL,24);
			freereg(pix2);
		} else if (spr_ps == 1) {
			pix2 = getreg("pix2");
			sasm_alu(a++,AL,MOV,pix2,0,pix1,LSR,16);
			a = asm_gp_convert(a,1,2,pix1);
			sasm_alu(a++,AL,ORR,pix1,pix1,pix2,LSL,24);
			freereg(pix2);
		}
		if (scr_mt == 0) /* No mask */
		{
			a = asm_gp_loadpix(a,scr,scr_ps,0,&pix2,&pix3,0);
			a = asm_gp_convert(a,scr_ps,2,pix2); /* Make it ps 2 */
			a = asm_gp_24_grey(a,pix1,pix2,&pix3); /* Apply mask */
			freereg(pix1); freereg(pix2);
			a = asm_gp_convert(a,2,scr_ps,pix3); /* Convert back */
			a = asm_gp_savepix(a,scr,scr_ps,scr_mt,pix3,0,1); /* Store */
			freereg(pix3);
		}
		else
		{
			/* some kind of mask */
			/* We need to stuff some regs onto the stack otherwise we'll run out */
			sasm_mem4(a++,AL,SFD+W,13,(1 << sx)+(1 << sy));
			freereg(sx); freereg(sy);
			a = asm_gp_loadpix(a,scr,scr_ps,scr_mt,&pix2,&pix3,0); /* Load screen pixel */
			a = asm_gp_seperatemask(a,scr_ps,scr_mt,pix2,&pix3); /* seperate screen mask from colour if not already */
			a = asm_gp_convert(a,scr_ps,2,pix2); /* Convert screen colour to 24bpp */
			a = asm_gp_24_grey(a,pix1,pix2,&pix4); /* Apply mask to screen colour */
			freereg(pix2); /* Dispose of screen colour */ 
			sasm_alu(a++,AL,MOV,pix1,0,pix1,LSR,24); /* Get sprite mask value */
			a = asm_gp_multmask(a,scr_ps,scr_mt,pix1,pix3); /* Now pix3 is the adjusted screen mask */
			freereg(pix1); /* Dispose of sprite mask */
			a = asm_gp_combinemask(a,scr_ps,scr_mt,pix4,pix3); /* Stick mask back onto pixel */
			a = asm_gp_savepix(a,scr,scr_ps,scr_mt,pix4,&pix3,1); /* Store pixel */
			freereg(pix4);
			/* Restore regs */
			sasm_mem4(a++,AL,LFD+W,13,(1 << sx)+(1 << sy));
			freeregs &= ~((1 << sx)+(1 << sy)); /* Reclaim regs */
			regnames[sx] = "sx"; regnames[sy] = "sy";
		}
	} else {
		/* Similar to above, need pix1 and pix2 as 24bpp */
		/* We need to stuff some regs onto the stack otherwise we'll run out */
		sasm_mem4(a++,AL,SFD+W,13,(1 << sx)+(1 << sy));
		freereg(sx); freereg(sy);
		if (spr_ps == 0) {
			pix2 = getreg("pix2");
			sasm_alu(a++,AL,MOV,pix2,0,pix1,LSR,8);
			a = asm_gp_convert(a,0,2,pix1);
			a = asm_gp_convert(a,0,2,pix2);
		} else if (spr_ps == 1) {
			pix2 = getreg("pix2");
			sasm_alu(a++,AL,MOV,pix2,0,pix1,LSR,16);
			a = asm_gp_convert(a,0,2,pix1);
			a = asm_gp_convert(a,0,2,pix2);
		}
		if (scr_mt == 0)
		{
			/* Now load screen & convert to 24bpp */
			a = asm_gp_loadpix(a,scr,scr_ps,0,&pix3,&pix4,0);
			a = asm_gp_convert(a,scr_ps,2,pix3);
			a = asm_gp_24_rgb(a,pix1,pix2,pix3,&pix4);
			/* Convert back & store */
			freereg(pix1); freereg(pix2); freereg(pix3);
			a = asm_gp_convert(a,2,scr_ps,pix4);
			a = asm_gp_savepix(a,scr,scr_ps,scr_mt,pix4,0,1);
			freereg(pix4);
		}
		else
		{
			/* some kind of mask */
			a = asm_gp_loadpix(a,scr,scr_ps,scr_mt,&pix3,&pix4,0); /* load screen pixel */
			a = asm_gp_seperatemask(a,scr_ps,scr_mt,pix3,&pix4); /* seperate screen mask if not already */
			a = asm_gp_convert(a,scr_ps,2,pix3); /* convert screen colour to 24bpp */
			a = asm_gp_24_rgb(a,pix1,pix2,pix3,&pix5); /* Apply sprite mask to screen colour */
			freereg(pix1); freereg(pix3); /* Dispose of sprite colour, screen colour */
			a = asm_gp_multmask24(a,spr_ps,scr_ps,scr_mt,pix2,pix4); /* Adjust screen mask */
			freereg(pix2); /* Dispose of sprite mask */
			a = asm_gp_combinemask(a,scr_ps,scr_mt,pix5,pix4); /* Stick mask back onto pixel */
			a = asm_gp_savepix(a,scr,scr_ps,scr_mt,pix5,&pix4,1); /* Store pixel */
			freereg(pix5);
		}
		/* Restore regs */
		sasm_mem4(a++,AL,LFD+W,13,(1 << sx)+(1 << sy));
		freeregs &= ~((1 << sx)+(1 << sy)); /* Reclaim regs */
		regnames[sx] = "sx"; regnames[sy] = "sy";
	}
	/* Now loop epilogue */
	/* Increase positions and decrease length by 1 pixel */
	sasm_alu(a++,AL,ADD,sx,sx,xd,0,0);
	sasm_alu(a++,AL,ADD,sy,sy,yd,0,0);
	sasm_alu(a++,AL,CMP,0,sx,spr_w,LSL,16);
	sasm_alu(a++,GE,SUB,sx,sx,spr_w,LSL,16);
	sasm_alu(a++,AL,CMP,0,sy,spr_h,LSL,16);
	sasm_alu(a++,GE,SUB,sy,sy,spr_h,LSL,16);
	sasm_alu3(a++,AL,SUB+S,l,l,1);
	sasm_b(a++,GT,0,loop); /* Loop round if more pixels */
	sasm_mem4(a++,AL,LFD+W,13,0x8FF0); /* Return if done */
	return a;
}

static int *asm_gp_genplot_pre2(int *a,int ps,int spr_mt,int scr_mt,int spr,int x,int y,int scr,int spr_w,int spr_h,int spr_g,int scr_w,int scr_h,int scr_g)
{
	/* Generate plotter for when sprite & screen have same colour depth,
	   sprite has some kind of simple (GP_MT_NONE or GP_MT_ONOFF) mask,
	   and both have the same true pixel size */
	int tps = gp_col_trueps(ps,spr_mt);
	int *l1,*xloop,*yloop,*l2,*l3;
	int cx,pix1,pix2,masktmp;
	masktmp = 0;
	/* To save major headaches, ensure that screen is fully word aligned */
	sasm_alu3(a++,AL,TST,0,scr_w,(tps?1:3));
	sasm_alu3(a++,EQ,TST,0,scr,3);
	sasm_alu3(a++,EQ,TST,0,scr_g,3);
	l1 = a++; /* Insert branch to pixel-by-pixel code */
	/* Do calcdraw stuff */
	sasm_alu3(a++,AL,ADD,spr_w,spr_w,4/(tps+1)); /* Number of pixels in preshift column */
	sasm_alu3(a++,AL,SUB,spr_g,spr_g,4); /* Skip back over preshift column */
	a = asm_gp_spr_calcdraw_pre(a,x,y,spr,spr_w,spr_h,spr_g,scr,scr_w,scr_h,scr_g,tps);
	/* Vars are now set up for the main plot loop:
	   spr,scr = pointers
	   spr_w,spr_h = size
	   spr_g,scr_g = end-of-line gaps
	   x,y = start/end column masks: scr = (scr & mask) | (spr & ~mask) */
	freereg(scr_w); freereg(scr_h);
	sasm_alu3(a++,AL,CMP,0,spr_w,0);
	sasm_alu3(a++,GT,CMP,0,spr_h,0);
	sasm_alu3(a++,LE,MOV,0,0,0);
	sasm_mem4(a++,LE,LFD+W,13,0x8FF0); /* Exit if nothing to draw */
	/* Plotter prologue */
	sasm_alu3(a++,AL,SUB,spr_w,spr_w,8/(tps+1)); /* Forget end columns */
	yloop = a;
	cx = getreg("cx"); /* count of how many words of data to shift */
	/* There should be 5 registers free in total */
	/* Draw left hand masked area */
	pix1 = getreg("pix1"); pix2 = getreg("pix2");
	sasm_mem2(a++,AL,L+U,pix1,spr,4);
	sasm_mem2(a++,AL,L+PRE,pix2,scr,0);
	/* Check maskness */
	if (spr_mt == GP_MT_ONOFF) /* Masked sprite? */
	{
		if (ps == 0)
			masktmp = 0xFF00;
		else
			masktmp = 0x8000;
		sasm_alu3(a++,AL,TST,0,pix1,masktmp); /* If pixel unset, copy from screen (pix2) to sprite (pix1) */
		sasm_alu(a++,EQ,MOV,pix1,0,pix1,LSR,16);
		sasm_alu(a++,EQ,ORR,pix1,pix1,pix2,LSL,16);
		sasm_alu(a++,EQ,MOV,pix1,0,pix1,ROR,16);
		sasm_alu3(a++,AL,TST,0,pix1,masktmp << 16);
		sasm_alu(a++,EQ,MOV,pix1,0,pix1,LSL,16);
		sasm_alu(a++,EQ,ORR,pix1,pix1,pix2,LSR,16);
		sasm_alu(a++,EQ,MOV,pix1,0,pix1,ROR,16);
	}
	else if (scr_mt == GP_MT_ONOFF) /* Masked screen? */
	{
		sasm_alu3(a++,AL,ORR,pix1,pix1,0x8000); /* Turn (16bpp) pixels on */
		sasm_alu3(a++,AL,ORR,pix1,pix1,0x80000000);
	}
	sasm_alu(a++,AL,BIC,pix1,pix1,x,0,0);
	sasm_alu(a++,AL,AND,pix2,pix2,x,0,0);
	sasm_alu(a++,AL,ORR,pix1,pix1,pix2,0,0);
	sasm_mem2(a++,AL,U,pix1,scr,4);
	freereg(pix1); freereg(pix2);
	/* Do center */
	sasm_alu(a++,AL,MOV+S,cx,0,spr_w,ASR,2-tps); /* Width in words */
	/* If <=2 words visible, don't draw middle */
	l2 = a++; /* Insert branch here */
	/* 1 word transfers */
	sasm_alu(a++,AL,MOV+S,cx,0,cx,LSR,1);
	a = asm_gp_spr_transfer(a,CS,CC,spr,scr,ps,spr_mt,scr_mt,1,-1);
	/* Two word */
	sasm_alu(a++,AL,MOV+S,cx,0,cx,LSR,1);
	a = asm_gp_spr_transfer(a,CS,CC,spr,scr,ps,spr_mt,scr_mt,2,-1);
	/* Four word loop */
	xloop = a;
	sasm_alu3(a++,AL,SUB+S,cx,cx,1);
	a = asm_gp_spr_transfer(a,GE,LT,spr,scr,ps,spr_mt,scr_mt,4,cx);
	sasm_b(a++,GT,0,xloop);
	/* Right hand masked area */
	/* Insert branch */ sasm_b(l2,LE,0,a);
	sasm_alu3(a++,AL,CMP,0,spr_w,0); /* If <2 words visible, don't draw this edge */
	l3 = a++; /* Insert branch here */
	pix1 = getreg("pix1"); pix2 = getreg("pix2");
	sasm_mem2(a++,AL,L+U,pix1,spr,4);
	sasm_mem2(a++,AL,L+PRE,pix2,scr,0);
	/* Check maskness */
	if (spr_mt == GP_MT_ONOFF) /* Masked sprite? */
	{
		sasm_alu3(a++,AL,TST,0,pix1,masktmp); /* If pixel unset, copy from screen (pix2) to sprite (pix1) */
		sasm_alu(a++,EQ,MOV,pix1,0,pix1,LSR,16);
		sasm_alu(a++,EQ,ORR,pix1,pix1,pix2,LSL,16);
		sasm_alu(a++,EQ,MOV,pix1,0,pix1,ROR,16);
		sasm_alu3(a++,AL,TST,0,pix1,masktmp << 16);
		sasm_alu(a++,EQ,MOV,pix1,0,pix1,LSL,16);
		sasm_alu(a++,EQ,ORR,pix1,pix1,pix2,LSR,16);
		sasm_alu(a++,EQ,MOV,pix1,0,pix1,ROR,16);
	}
	else if (scr_mt == GP_MT_ONOFF) /* Masked screen? */
	{
		sasm_alu3(a++,AL,ORR,pix1,pix1,0x8000); /* Turn (16bpp) pixels on */
		sasm_alu3(a++,AL,ORR,pix1,pix1,0x80000000);
	}
	sasm_alu(a++,AL,BIC,pix1,pix1,y,0,0);
	sasm_alu(a++,AL,AND,pix2,pix2,y,0,0);
	sasm_alu(a++,AL,ORR,pix1,pix1,pix2,0,0);
	sasm_mem2(a++,AL,U,pix1,scr,4);
	freereg(pix1); freereg(pix2);
	/* Insert branch */ sasm_b(l3,LT,0,a);
	a = asm_gp_sprdraw_epilogue(a,cx,spr_h,spr,spr_g,scr,scr_g,0,0,0,yloop);
	sasm_alu3(a++,AL,MOV,0,0,0);
	sasm_mem4(a++,AL,LFD+W,13,0x8FF0); /* Return from function */
	/* Now the boring, 1-pixel approach */
	/* Insert branch */ sasm_b(l1,NE,0,a);
	/* Restore regs */
	freereg(cx);
	freeregs &= ~((1 << scr_w)+(1 << scr_h));
	regnames[scr_w] = "scr_w"; regnames[scr_h] = "scr_h";
	/* reuse existing func */
	a = asm_gp_genplot_simple(a,ps,spr_mt,ps,scr_mt,spr,x,y,scr,spr_w,spr_h,spr_g,scr_w,scr_h,scr_g);
	return a;
}

/*
				GENERATOR WRAPPERS
*/

static int *asm_gp_genplot(int *a,int spr_ps,int spr_mt,int scr_ps,int scr_mt)
{
	int spr_w,spr_h,spr_g,scr_w,scr_h,scr_g;
	sasm_capabilities = sasm_getcapabilities();
	/* Generate code for plotting a sprite to screen */
	/* R0=spr ptr, R1=x, R2=y, R3=scr ptr */
	freeregs = 0x5FF0; /* R4-R12, R14 free */
	regnames[0] = "spr"; regnames[1] = "x"; regnames[2] = "y"; regnames[3] = "scr"; regnames[13] = "r13"; regnames[15] = "pc";
/*	warning("Generating plotter for spr_ps=%d, spr_mt=%d, scr_ps=%d, scr_mt=%d",spr_ps,spr_mt,scr_ps,scr_mt);*/
	sasm_mem4(a++,AL,SFD+W,13,0x4FF0); /* R4-R11,R14 */
	a = asm_plotinit(a,0,1,2,3,&spr_w,&spr_h,&spr_g,&scr_w,&scr_h,&scr_g);
	/* Decide which plotter template to use */
	if ((spr_ps == scr_ps) && (spr_mt < 2) && (gp_col_trueps(spr_ps,spr_mt) == gp_col_trueps(scr_ps,scr_mt)))
		return asm_gp_genplot_block(a,spr_ps,spr_mt,scr_mt,0,1,2,3,spr_w,spr_h,spr_g,scr_w,scr_h,scr_g);
	else
		return asm_gp_genplot_simple(a,spr_ps,spr_mt,scr_ps,scr_mt,0,1,2,3,spr_w,spr_h,spr_g,scr_w,scr_h,scr_g);
}

static int *asm_gp_genplot_t(int *a,int spr_ps,int spr_mt,int scr_ps,int scr_mt)
{
	sasm_capabilities = sasm_getcapabilities();
	/* Generate code for plotting a transformed sprite to screen */
	/* R0=spr ptr, R1=x, R2=y, R3=scr ptr, stack=l, sx, sy, xd, yd */
	sasm_mem4(a++,AL,SFD+W,13,0x4FF0); /* R4-R11,R14 */
	/* Now pull R4-R8 off stack */
	sasm_alu3(a++,AL,ADD,4,13,9*4); /* Get ptr to params */
	sasm_mem4(a++,AL,L+IA,4,0x1F0); /* R4-R8 */
	freeregs = 0x5E00; /* R9-R12,R14 free */
	regnames[0] = "spr"; regnames[1] = "x"; regnames[2] = "y"; regnames[3] = "scr"; regnames[4] = "l"; regnames[5] = "sx"; regnames[6] = "sy"; regnames[7] = "xd"; regnames[8] = "yd"; regnames[13] = "r13"; regnames[15] = "pc";
	a = asm_plotinit_t(a,0,1,2,3,4,5,6,7,8,spr_ps,spr_mt,scr_ps,scr_mt);
	return asm_gp_genplot_transformed(a,0,1,2,3,4,5,6,7,8,spr_ps,spr_mt,scr_ps,scr_mt);
} 

static int *asm_gp_genplot_pre(int *a,int spr_ps,int spr_mt,int scr_ps,int scr_mt)
{
	int spr_w,spr_h,spr_g,scr_w,scr_h,scr_g;
	sasm_capabilities = sasm_getcapabilities();
	/* Generate code for plotting a preshifted sprite to screen */
	/* R0=spr ptr, R1=x, R2=y, R3=scr ptr */
	freeregs = 0x5FF0; /* R4-R12, R14 free */
	regnames[0] = "spr"; regnames[1] = "x"; regnames[2] = "y"; regnames[3] = "scr"; regnames[13] = "r13"; regnames[15] = "pc";
/*	warning("Generating plotter for spr_ps=%d, spr_mt=%d, scr_ps=%d, scr_mt=%d",spr_ps,spr_mt,scr_ps,scr_mt);*/
	sasm_mem4(a++,AL,SFD+W,13,0x4FF0); /* R4-R11,R14 */
	a = asm_plotinit(a,0,1,2,3,&spr_w,&spr_h,&spr_g,&scr_w,&scr_h,&scr_g);
	return asm_gp_genplot_pre2(a,spr_ps,spr_mt,scr_mt,0,1,2,3,spr_w,spr_h,spr_g,scr_w,scr_h,scr_g);
}

static int gp_genplot_monkey2(gp_spr *spr,int x,int y,gp_screen *scr)
{
	/* Generate level-two sprite plotting code
	   i.e. plotter for specific sprite & screen format */
	int buf[1024];
	int *b,*c;
	int l;
	b = asm_gp_genplot(buf,(spr->f->getps)(spr),(spr->f->getmt)(spr),scr->ps,scr->mt);
	l = ((int) b)-((int) buf);
	if (l > 4096)
		error("Buffer overrun when generating plotter for spr_ps=%d,spr_mt=%d,scr_ps=%d,scr_mt=%d",(spr->f->getps)(spr),(spr->f->getmt)(spr),scr->ps,scr->mt);
	c = malloc(l);
	if (c == 0)
		return 1; /* Fail */
	memcpy(c,buf,l);
	sasm_sync(c,l);
	/* Now patch it into the jump table */
	b = (int *) spr->f->plot;
	b+=7+scr->ps*4+scr->mt;
	sasm_b(b,AL,0,c);
	sasm_sync(b,4);
	/* Now call it */
	return (spr->f->plot)(spr,x,y,scr);
}

static int gp_genplot_yeknom2(gp_spr *spr,int x,int y,gp_screen *scr,int l,f1616 sx,f1616 sy,f1616 xd,f1616 yd)
{
	/* Generate level-two transformed sprite plotting code
	   i.e. plotter for specific sprite & screen format */
	int buf[1024];
	int *b,*c;
	int len;
	b = asm_gp_genplot_t(buf,(spr->f->getps)(spr),(spr->f->getmt)(spr),scr->ps,scr->mt);
	len = ((int) b)-((int) buf);
	if (len > 4096)
		error("Buffer overrun when generating transformed plotter for spr_ps=%d,spr_mt=%d,scr_ps=%d,scr_mt=%d",(spr->f->getps)(spr),(spr->f->getmt)(spr),scr->ps,scr->mt);
	c = malloc(len);
	if (c == 0)
		return 1; /* Fail */
	memcpy(c,buf,len);
	sasm_sync(c,len);
	/* Now patch it into the jump table */
	b = (int *) spr->f->tplot;
	b+=7+scr->ps*4+scr->mt;
	sasm_b(b,AL,0,c);
	sasm_sync(b,4);
	/* Now call it */
	return (spr->f->tplot)(spr,x,y,scr,l,sx,sy,xd,yd);
}

static int gp_genplot_premonkey2(gp_spr *spr,int x,int y,gp_screen *scr)
{
	/* Generate level-two preshifted sprite plotting code
	   i.e. plotter for specific sprite & screen format */
	int buf[1024];
	int *b,*c;
	int l;
	b = asm_gp_genplot_pre(buf,(spr->f->getps)(spr),(spr->f->getmt)(spr),scr->ps,scr->mt);
	l = ((int) b)-((int) buf);
	if (l > 4096)
		error("Buffer overrun when generating preshifted plotter for spr_ps=%d,spr_mt=%d,scr_ps=%d,scr_mt=%d",(spr->f->getps)(spr),(spr->f->getmt)(spr),scr->ps,scr->mt);
	c = malloc(l);
	if (c == 0)
		return 1; /* Fail */
	memcpy(c,buf,l);
	sasm_sync(c,l);
	/* Now patch it into the jump table */
	b = (int *) spr->f->plot;
	b+=7+scr->ps*4+scr->mt;
	sasm_b(b,AL,0,c);
	sasm_sync(b,4);
	/* Now call it */
	return (spr->f->plot)(spr,x,y,scr);
}

static int *asm_gp_genplot_jumptable(int *a,int scr)
{
	/* Generate jump table header
	   'scr' should be screen ptr register */
	sasm_mem2(a++,AL,L+U+PRE,12,scr,offsetof(gp_screen,ps));
	sasm_alu3(a++,AL,CMP,0,12,1);
	sasm_mem2(a++,AL,L+U+PRE,12,scr,offsetof(gp_screen,mt));
	sasm_alu3(a++,EQ,ADD,12,12,4); /* 0=0, 1=+4, 2=+8 */
	sasm_alu3(a++,GT,ADD,12,12,8);
	sasm_alu(a++,AL,ADD,15,15,12,LSL,2);
	sasm_alu(a++,AL,MOV,0,0,0,0,0);
	return a;
}

/*
				FRONTEND CODE
*/

int gp_genplot_monkey(gp_spr *spr,int x,int y,gp_screen *scr)
{
	/* Generate level-one sprite plotting code
	   i.e. the branch table into the per-screen plotters */
	int *buf,*a;
	int i;
	a = buf = malloc(19*4); /* 7 instr header, 12 instr table */
	if (a == 0)
		return 1; /* Fail */
	a = asm_gp_genplot_jumptable(a,3);
	for (i=0;i<12;i++)
		sasm_b(a++,AL,0,(int *) gp_genplot_monkey2);
	sasm_sync(buf,19*4);
	spr->f->plot = ((void *) buf);
	sasm_sync((int *) &(spr->f->plot),4);
	/* Now call monkey2 to generate second level plotter */
	return gp_genplot_monkey2(spr,x,y,scr);
}

int gp_genplot_yeknom(gp_spr *spr,int x,int y,gp_screen *scr,int l,f1616 sx,f1616 sy,f1616 xd,f1616 yd)
{
	/* Generate level-one transformed sprite plotting code */
	int *buf,*a;
	int i;
	a = buf = malloc(19*4);
	if (a == 0)
		return 1;
	a = asm_gp_genplot_jumptable(a,3);
	for (i=0;i<12;i++)
		sasm_b(a++,AL,0,(int *) gp_genplot_yeknom2);
	sasm_sync(buf,19*4);
	spr->f->tplot = ((void *) buf);
	sasm_sync((int *) &(spr->f->tplot),4);
	/* Now call yeknom2 to generate second level plotter */
	return gp_genplot_yeknom2(spr,x,y,scr,l,sx,sy,xd,yd);
}

int gp_genplot_premonkey(gp_spr *spr,int x,int y,gp_screen *scr)
{
	/* Generate level-one preshifted sprite plotting code
	   i.e. the branch table into the per-screen plotters */
	int *buf,*a;
	int i;
	a = buf = malloc(19*4); /* 7 instr header, 12 instr table */
	if (a == 0)
		return 1; /* Fail */
	a = asm_gp_genplot_jumptable(a,3);
	for (i=0;i<12;i++)
		sasm_b(a++,AL,0,(int *) gp_genplot_monkey2);
	/* Replace table entries as appropriate for the preshifted generator */
	if (spr->f == &gp_spr_pre00)
		sasm_b(&buf[7],AL,0,(int *) gp_genplot_premonkey2); /* ps 0, mt 0 */
	else if (spr->f == &gp_spr_pre01)
		sasm_b(&buf[7+1],AL,0,(int *) gp_genplot_premonkey2); /* ps 0, mt 1 */
	else
	{
		sasm_b(&buf[7+4],AL,0,(int *) gp_genplot_premonkey2); /* ps 1, mt 0 */
		sasm_b(&buf[7+5],AL,0,(int *) gp_genplot_premonkey2); /* ps 1, mt 1 */
	}
	sasm_sync(buf,19*4);
	spr->f->plot = ((void *) buf);
	sasm_sync((int *) &(spr->f->plot),4);
	/* Now call second level plotter generator */
	return (spr->f->plot)(spr,x,y,scr);
}

/*
				TEST CODE
*/

int gp_genplot_testmonkey(int *a,int spr_ps,int spr_mt,int scr_ps,int scr_mt)
{
	int *b = asm_gp_genplot(a,spr_ps,spr_mt,scr_ps,scr_mt);
	return ((int) b) - ((int) a);
}

int gp_genplot_testyeknom(int *a,int spr_ps,int spr_mt,int scr_ps,int scr_mt)
{
	int *b = asm_gp_genplot_t(a,spr_ps,spr_mt,scr_ps,scr_mt);
	return ((int) b) - ((int) a);
}

#endif
