#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#include <float.h>

#include "dbgutils.h"

#include "dbgutils.c" /* Hack to allow single-file compilation for profanal */

void *raw_data=0; /* Raw data */
int raw_size=0; /* Size of raw data */
uint32_t *prof_data=0; /* Start of profile samples */
int prof_samples=0; /* Number of profile samples */
int prof_size=0; /* Size of each sample (in words) */
int prof_freq=0; /* Frequency of samples, Hz */
int reg_mask=0; /* Mask of saved registers */
int reg_offsets[17]; /* Register offsets (indices into uint32_t array), -1 if reg not saved */
int reg_filter=0; /* Registers to show in dump output */
int perf_count=0; /* Number of performance counters */
int perf_offsets[32]; /* Performance counter offsets */
char *perf_names[32]; /* Performance counter names */
int perf_show[32][2]; /* Counters to show, numerator & denominator (or -1) */
int perf_showcount=0; /* How many to show */
int unknown_filter=1; /* -1=hide, 0=show all, 1=group together */
int psrmode=1; /* 0=raw, 1=simple */
FILE *dumpto=0; /* File for dumpto output */

/*

				Data loading/management

*/

void KillProf(void)
{
	KillModules();
	if(raw_data)
		free(raw_data);
	raw_data = 0;
	raw_size = 0;
	prof_data = 0;
	prof_samples = 0;
	prof_size = 0;
	prof_freq = 0;
	reg_mask = 0;
	memset(reg_offsets,-1,sizeof(reg_offsets));
	perf_count = 0;
	perf_showcount = 0;
}

char *GetPerfCountName(int id,int iyonix)
{
	if(iyonix)
	{
		/* From the IOP developer's manual... */
		switch(id)
		{
		case 0x0: return "ICache miss count";
		case 0x1: return "ICache cannot deliver cycles";
		case 0x2: return "Data dependency stall cycles";
		case 0x3: return "ITLB miss count";
		case 0x4: return "DTLB miss count";
		case 0x5: return "Branch instructions (taken or not)";
		case 0x6: return "Branch mispredicted (B or BL)";
		case 0x7: return "Instruction executed";
		case 0x8: return "DBuffer full stall cycles";
		case 0x9: return "DBuffer full stall count";
		case 0xa: return "Data cache hit";
		case 0xb: return "Data cache miss";
		case 0xc: return "Data cache writeback";
		case 0xd: return "Software changed PC, no mode change";
		case 0x10: return "BCU 1-bit error count";
		case 0x11: return "BCU request queue full cycles";
		case 0x12: return "BCU request queue nonempty & bus in hold cycles";
		case 0x13: return "RMW cycle due to narrow write to ECC RAM";
		case 0x14: return "ECC detected by BCU but no ELOG register available";
		}
	}
	else
	{
		/* From the Cortex-A8 TRM... */
		switch(id)
		{
		case 0x0: return "Software event";
		case 0x1: return "ICache refill count";
		case 0x2: return "ITLB refill count";
		case 0x3: return "DCache refill count";
		case 0x4: return "DCache hit count";
		case 0x5: return "DTLB refill count";
		case 0x6: return "Data read executed";
		case 0x7: return "Data write executed";
		case 0x8: return "Instruction executed";
		case 0x9: return "Exception taken";
		case 0xa: return "Return from execption";
		case 0xb: return "Context ID written";
		case 0xc: return "Software changed PC, not exception";
		case 0xd: return "Immediate branch instructions (taken or not)";
		case 0xe: return "Procedure returns executed";
		case 0xf: return "Unaligned accesses";
		case 0x10: return "Branch mispredicted/not predicted";
		case 0x11: return "Cycle count";
		case 0x12: return "Branch prediction successes";
		case 0x40: return "Write buffer full cycles";
		case 0x41: return "Store merged into L2";
		case 0x42: return "Bufferable store from L2 load/store";
		case 0x43: return "L2 cache access";
		case 0x44: return "L2 cacheable miss";
		case 0x45: return "AXI data read transfers";
		case 0x46: return "AXI data write transfers";
		case 0x47: return "Memory replay event";
		case 0x48: return "Memory replay caused by unaligned access";
		case 0x49: return "L1 DCache miss due to hashing";
		case 0x4a: return "L1 ICache miss due to hashing";
		case 0x4b: return "L1 DCache access page colouring alias count";
		case 0x4c: return "NEON L1 cache hit";
		case 0x4d: return "NEON L1 cache accesses";
		case 0x4e: return "L2 accesses caused by NEON";
		case 0x4f: return "NEON L2 cahe hit";
		case 0x50: return "L1 instruction cache access";
		case 0x51: return "Return stack misprediction";
		case 0x52: return "Branch mispredicted";
		case 0x53: return "Branches predicted to be taken";
		case 0x54: return "Branches predicted to not be taken";
		case 0x55: return "Number of operations";
		case 0x56: return "Cycles idle due to no instructions available for issue";
		case 0x57: return "Instructions issued";
		case 0x58: return "Stall on NEON MRC";
		case 0x59: return "Stall on full NEON instruction queue/data load queue";
		case 0x5a: return "Cycles where NEON and ARM are both busy";
		case 0x70: return "PMUEXTIN[0] event count";
		case 0x71: return "PMUEXTIN[1] event count";
		case 0x72: return "PMUEXTIN[0]+PMUEXTIN[1] event count";
		}
	}
	return "UNKNOWN";
}

void LoadProf(char *file)
{
	FILE *f;
	int i,flags_word;
	module *m;
	KillProf();
	f = safe_fopen(file,"rb");
	fseek(f,0,SEEK_END);
	raw_size = ftell(f);
	fseek(f,0,SEEK_SET);
	raw_data = safe_malloc(raw_size);
	int chunk_size = MAX(raw_size/32,128*1024);
	for(i=0;i<raw_size;i+=chunk_size)
	{
		int chunk = MIN(raw_size-i,chunk_size);
		fread(((char *)raw_data)+i,chunk,1,f);
		putchar('.');
	}
	putchar('\n');
	fclose(f);
	/* Parse header data:
	   - Module list
	   - Register mask
	   - Sample frequency
	*/
	uint32_t *parse = (uint32_t *) raw_data;
	while(*parse)
	{
		char *name = (char *)(parse+2);
		/* Convert spaces to underscores */
		char *c = name;
		while(*c)
		{
			if(*c == ' ')
				*c = '_';
			c++;
		}
		AddModule(parse[0],parse[1],name);
		name += strlen(name)+1;
		while(((uint32_t)name) & 3)
			name++;
		parse = (uint32_t *) name;
	}
	parse++;
	flags_word = *parse++;
	reg_filter = reg_mask = flags_word & 0x1ffff;
	/* Load performance counter list */
	i = flags_word & ~0x1ffff;
	if(i)
	{
		int iyonix = i & (1<<25);
		int cyclecount = i & (1<<26);
		int perfcount = (i>>27) & 31;
		if(cyclecount)
			perf_names[perf_count++] = "Cycle count";
		for(i=0;i<perfcount;i++)
			perf_names[perf_count++] = GetPerfCountName(*parse++,iyonix);
	}
	prof_freq = *parse++;
	prof_data = parse;
	/* Decode register offsets, calculate sample size.
	   Registers are saved in the following order:
	   R0-R7,PSR,R15,R8-R14 */
	for(i=0;i<8;i++)
		if(reg_mask & (1<<i))
			reg_offsets[i] = prof_size++;
	if(reg_mask & 0x10000)
		reg_offsets[16] = prof_size++;
	if(reg_mask & 0x8000)
		reg_offsets[15] = prof_size++;
	for(i=8;i<15;i++)
		if(reg_mask & (1<<i))
			reg_offsets[i] = prof_size++;
	/* Performance counters are stored after the registers */
	for(i=0;i<perf_count;i++)
		perf_offsets[i] = prof_size++;
	/* How many samples are there? */
	prof_samples = (((int)raw_data)+raw_size-((int)prof_data))/(prof_size*4);

	printf("Profile data loaded. %d modules, %05x reg mask, %d performance counters, %d samples @ %dHz\n",nummod,reg_mask,perf_count,prof_samples,prof_freq);
	for(i=0;i<perf_count;i++)
		printf("Counter #%d: %s\n",i,perf_names[i]);

	/* Now add standard 'modules' */
	uint32_t vec_offset = ((flags_word&(1<<24))?0xffff0000:0);
	AddModule(vec_offset+0,vec_offset+4,"Branch_through_zero");
	AddModule(vec_offset+4,vec_offset+8,"Undefined_instruction");
	AddModule(vec_offset+8,vec_offset+12,"SWI");
	AddModule(vec_offset+12,vec_offset+16,"Prefetch_abort");
	AddModule(vec_offset+16,vec_offset+20,"Data_abort");
	AddModule(vec_offset+20,vec_offset+24,"Address_Exception");
	AddModule(vec_offset+24,vec_offset+28,"IRQ");
	AddModule(vec_offset+28,vec_offset+0xFC,"FIQ");
	/* If Aemulor is loaded it will have clamped application space to 28MB and inserted its own dynamic areas just above. So don't claim that we have the full 512MB of app space available. */
	uint32_t app_max = 512*1024*1024;
	for(i=0;i<nummod;i++)
		if((modules[i].start > 0x8000) && (modules[i].start < app_max))
			app_max = modules[i].start;
	AddModule(0x8000,app_max,"Application_space");
	if(!FindModule("HAL"))
	{
		AddModule(0xFC000000,0xFC010000,"HAL"); /* Assume 64K HAL in usual location */
		printf("HAL inserted at FC000000\n");
	}
	/* If the UtilityModule is where we expect it to be, rename it to Kernel and adjust its base address so it starts right after the HAL */
	if(((m=FindModule("UtilityModule")) != 0) && !FindModule("Kernel"))
	{
		if(m->start > 0xFC010000)
		{
			printf("Transformed UtilityModule (base %08X) into Kernel (base FC010000)\n",m->start);
			m->start = 0xFC010000;
			strcpy(m->name,"Kernel");
			modules_inorder=false; /* Just in case */
		}
	}
}

/*

				Analysis routines

*/

void DoDump(FILE *out,int start,int end,int maxlines)
{
	int i;
	if(start < 0)
		start = 0;
	if(end > prof_samples)
		end = prof_samples;
	if(start == end)
		return;
	uint32_t *data = prof_data+start*prof_size;
	fprintf(out,"Sample      ");
	for(i=0;i<16;i++)
		if(reg_filter & (1<<i))
			fprintf(out,"R%-8d",i);
	if(reg_filter & 0x10000)
		fprintf(out,"PSR      ");
	for(i=0;i<perf_showcount;i++)
		if(perf_show[i][1] != -1)
			fprintf(out,"%6.6s/%6.6s ",perf_names[perf_show[i][0]],perf_names[perf_show[i][1]]);
		else
			fprintf(out,"%13.13s ",perf_names[perf_show[i][0]]);
	if(reg_offsets[15] != -1)
		fprintf(out,"Offset + Module\n");
	else
		fprintf(out,"\n");
	int lines=0;
	char format[16];
	sprintf(format,"%%08x+%%-%ds",maxmodnamelen);
	static const char *psrmodename[16] = {"USR","FIQ","IRQ","SVC","04?","05?","MON","ABT","08?","09?","10?","UND","12?","13?","14?","SYS"};
	uint64_t perf[32];
	while(start<end)
	{
		module *m=0;
		gpa_line *l=0;
		gpa_func *f=0;
		uint32_t pc=0;
		int count=1;
		for(i=0;i<perf_count;i++)
			perf[i] = data[perf_offsets[i]];
		if(reg_offsets[15] != -1)
		{
			/* Scan forwards to see how many entries to fold together, and to collect performance counter stats */
			module *om=0;
			gpa_func *of=0;
			gpa_line *ol=0;
			uint32_t *olddata = data;
			pc = data[reg_offsets[15]];
			om = FindAddress(pc);
			if(om)
			{
				ol = FindLine(om,pc);
				of = FindFunc(om,pc);
			}
			data += prof_size;
			while(start+count<end)
			{
				int hide=0;
				pc = data[reg_offsets[15]];
				m = FindAddress(pc);
				if(m)
				{
					if(m->detail == 3)
					{
						if(m == om)
							hide = 1;
					}
					else if(m->detail >= 0)
					{
						l = FindLine(m,pc);
						if((m->detail == 1) && (!l || (l == ol)))
							hide = 1;
						else
						{
							f = FindFunc(m,pc);
							if((m->detail == 2) && (!f || (f == of)))
								hide = 1;
						}
					}
					else
						hide = 1;
				}
				else if(unknown_filter == -1)
					hide = 1;
				else if((m == om) && (unknown_filter == 1))
					hide = 1;
				if(!hide)
					break;
				for(i=0;i<perf_count;i++)
					perf[i] += data[perf_offsets[i]];
				data += prof_size;
				count++;
			}
			data = olddata;
			/* Restore ready for output */
			pc = data[reg_offsets[15]];
			m=om;
			l=ol;
			f=of;
		}
		fprintf(out,"%-12d",start);
		for(i=0;i<16;i++)
			if(reg_filter & (1<<i))
				fprintf(out,"%08x ",data[reg_offsets[i]]);
		if(reg_filter & (1<<16))
		{
			uint32_t psr = data[reg_offsets[16]];
			if(psrmode)
				fprintf(out,"%s%d%c%c%c ",psrmodename[psr & 0xF],(psr&0x10)?32:26,(psr&0x20)?'T':' ',(psr&0x40)?'F':' ',(psr&0x80)?'I':' ');
			else
				fprintf(out,"%08x ",psr);
		}
		for(i=0;i<perf_showcount;i++)
			if(perf_show[i][1] != -1)
			{
				if(perf[perf_show[i][1]])
				{
					double val = ((double)(perf[perf_show[i][0]]))/((double)(perf[perf_show[i][1]]));
					fprintf(out,"%13g ",val);
				}
				else
					fprintf(out,"%11llu/0 ",perf[perf_show[i][0]]);
			}
			else
				fprintf(out,"%13llu ",perf[perf_show[i][0]]);
		if(m)
		{
			l = FindLine(m,pc);
			f = FindFunc(m,pc);
			fprintf(out,format,pc-m->start,m->name);
			if(f)
				fprintf(out," %08x+%s",pc-f->start,f->name);
			if(l)
				fprintf(out," file %s line %d",l->file,l->line);
			fprintf(out,"\n");
		}
		else if(reg_offsets[15] != -1)
			fprintf(out,"         Unknown\n");
		else
			fprintf(out,"\n");
		if(++lines == maxlines)
		{
			fprintf(out,"Line limit reached\n");
			return;
		}
		start+=count;
		data += prof_size*count;
	}
}

void DoGraph(FILE *out,int width,int start,int end)
{
	int i;
	if(start < 0)
		start = 0;
	if(end > prof_samples)
		end = prof_samples;
	if((start >= end) || (reg_offsets[15] == -1) || (width==0))
		return;
	uint32_t *data = prof_data+start*prof_size;
	uint32_t *graph = (uint32_t *) safe_malloc(4*width*(nummod+1));
	memset(graph,0,4*width*(nummod+1));
	uint32_t *hits = (uint32_t *) safe_malloc(4*(nummod+1));
	memset(hits,0,4*(nummod+1));
	int samples = end-start;
	float colwidth = ((float)samples)/width;
	if(colwidth < 1.0f)
	{
		colwidth = 1.0f;
		width = samples;
	}
	float coltime=((float)colwidth)/prof_freq;
	char *units[] = {"s","ms","s","ns","ps"};
	int unit=0;
	while((coltime < 1.0f) && (unit < 4))
	{
		unit++;
		coltime = coltime*1000.0f;
	}
	fprintf(out,"%d samples (%f%s) per column\n",(int)ceil(colwidth),coltime,units[unit]);
	int column=0;
	int remain=samples;
	int offset=0;
	static const char *usage = "0123456789*";
	while(offset<remain)
	{
		module *m = FindAddress(data[reg_offsets[15]]);
		if(m)
			i = m-modules;
		else
			i = nummod;
		hits[i]++;
		graph[column*(nummod+1)+i]++;
		offset++;
		data += prof_size;
		column = (int) ((((uint64_t)offset)*width)/samples);
		/* Guard against unexpected overflows */
		if(column >= width)
			column = width-1;
	}
	for(column=0;column<width;column++)
	{
		int total=0;
		for(i=0;i<nummod+1;i++)
			total += graph[column*(nummod+1)+i];
		if(total)
		{
			for(i=0;i<nummod+1;i++)
			{
				if(graph[column*(nummod+1)+i])
					graph[column*(nummod+1)+i] = usage[(10*graph[column*(nummod+1)+i])/total];
			}
		}
	}
	int maxlen=9;
	int hitwidth=4;
	char format[16],format2[16],format3[16];
	for(i=0;i<nummod+1;i++)
	{
		char *name = (i==nummod?"Unknown":modules[i].name);
		for(column=0;column<width;column++)
			if(graph[column*(nummod+1)+i])
				break;
		if((column != width) && (strlen(name) > maxlen))
			maxlen = strlen(name);
		sprintf(format,"%d",hits[i]);
		if(strlen(format) > hitwidth)
			hitwidth = strlen(format);
	}
	sprintf(format,"%%-%ds | ",maxlen);
	fprintf(out,format,"CPU usage");
	for(i=0;i<hitwidth;i++)
		fputc(' ',out);
	fprintf(out," | ");
	sprintf(format,"%%%ds | ",maxlen);
	fprintf(out,"Sample\n");
	sprintf(format3,"%%-%dd | ",hitwidth);
	sprintf(format2,"%d ",end);
	int tickwidth = strlen(format2);
	sprintf(format2,"%%-%dd",tickwidth);
	fprintf(out,format,"Module");
	fprintf(out,"Hits");
	for(i=4;i<hitwidth;i++)
		fputc(' ',out);
	fprintf(out," | ");
	for(column=0;column<width;column+=tickwidth)
		fprintf(out,format2,(int) (start+colwidth*column));
	fputc('\n',out);
	for(column=0;column<maxlen+1;column++)
		fputc('-',out);
	fputc('+',out);
	for(column=0;column<hitwidth+2;column++)
		fputc('-',out);
	fputc('+',out);
	int tick=-1;
	for(column=0;column<width+1;column++)
	{
		fputc((tick==0?'+':'-'),out);
		if(++tick == tickwidth)
			tick = 0;
	}
	fputc('-',out);
	fputc('\n',out);
	int line=1;
	for(i=0;i<nummod+1;i++)
	{
		char *name = (i==nummod?"Unknown":modules[i].name);
		if(hits[i])
		{
			if(!((line++)&7))
			{
				for(column=0;column<maxlen+1;column++)
					fputc('-',out);
				fputc('+',out);
				for(column=0;column<hitwidth+2;column++)
					fputc('-',out);
				fputc('+',out);
				for(column=0;column<width+2;column++)
					fputc('-',out);
				fputc('\n',out);
			}
			fprintf(out,format,name);
			fprintf(out,format3,hits[i]);
			for(column=0;column<width;column++)
			{
				if(graph[column*(nummod+1)+i])
					fputc(graph[column*(nummod+1)+i],out);
				else
					fputc(' ',out);
			}
			fputc('\n',out);
		}
	}
	free(graph);
}

void DoPGraph(FILE *out,int a,int b,int width,int height,int start,int end)
{
	int i;
	if(start < 0)
		start = 0;
	if(end > prof_samples)
		end = prof_samples;
	if((start >= end) || (reg_offsets[15] == -1) || (width==0))
		return;
	uint32_t *data = prof_data+start*prof_size;
	int samples = end-start;
	float colwidth = ((float)samples)/width;
	if(colwidth < 1.0f)
	{
		colwidth = 1.0f;
		width = samples;
	}
	float coltime=((float)colwidth)/prof_freq;
	char *units[] = {"s","ms","s","ns","ps"};
	int unit=0;
	while((coltime < 1.0f) && (unit < 4))
	{
		unit++;
		coltime = coltime*1000.0f;
	}
	fprintf(out,"%d samples (%f%s) per column\n",(int)ceil(colwidth),coltime,units[unit]);
	int column=0;
	/* Graph header */
	char format2[16];
	fprintf(out,"%15.15s | Sample\n",perf_names[a]);
	if(b!=-1)
		fprintf(out,"/%14.14s | ",perf_names[b]);
	else
		fprintf(out,"                | "); 
	sprintf(format2,"%d ",end);
	int tickwidth = strlen(format2);
	sprintf(format2,"%%-%dd",tickwidth);
	for(column=0;column<width;column+=tickwidth)
		fprintf(out,format2,(int) (start+colwidth*column));
	fputc('\n',out);
	fprintf(out,"----------------+");
	int tick=-1;
	for(column=0;column<width+1;column++)
	{
		fputc((tick==0?'+':'-'),out);
		if(++tick == tickwidth)
			tick = 0;
	}
	fputc('-',out);
	fputc('\n',out);
	/* Get height of graph (i.e. min & max result) */
	double pmax = 0,pmin = DBL_MAX;
	int offset=0;
	while(offset<samples)
	{
		double val = (double)data[perf_offsets[a]];
		if((b != -1) && (data[perf_offsets[b]]))
			val = val/((double)data[perf_offsets[b]]);
		pmax = MAX(pmax,val);
		pmin = MIN(pmin,val);
		offset++;
		data += prof_size;
	}
	double step = (pmax-pmin)/height;
	/* Now collate results */
	uint32_t *hits = safe_malloc(sizeof(uint32_t)*width*height);
	memset(hits,0,sizeof(uint32_t)*width*height);
	uint32_t *colhits = safe_malloc(sizeof(uint32_t)*width);
	memset(colhits,0,sizeof(uint32_t)*width);
	data = prof_data+start*prof_size;
	column=0;
	int remain=samples;
	offset=0;
	while(offset<remain)
	{
		double val = (double)data[perf_offsets[a]];
		if((b != -1) && (data[perf_offsets[b]]))
			val = val/((double)data[perf_offsets[b]]);
		int row = (int) (((val-pmin)*height)/(pmax-pmin));
		row = MAX(row,0);
		row = MIN(row,height-1);
		hits[column+row*width]++;
		colhits[column]++;
		offset++;
		data += prof_size;
		column = (int) ((((uint64_t)offset)*width)/samples);
		/* Guard against unexpected overflows */
		if(column >= width)
			column = width-1;
	}
	uint32_t *row = hits+height*width;
	while(height--)
	{
		row -= width;
		fprintf(out,"%15g | ",pmax);
		for(i=0;i<width;i++)
		{
			char c = ' ';
			if(row[i])
			{
				static const char *usage = "0123456789*";
				c = usage[(row[i]*10)/colhits[i]];
			}
			fputc(c,out);
		}
		fputc('\n',out);
		pmax -= step;
		pmax=MAX(pmax,pmin);
	}
	free(hits);
	free(colhits);
}

typedef struct {
	module *mod;
	gpa_func *func;
} hist_entry;

static int hist_comp_func(const void *a,const void *b)
{
	const hist_entry *ha = (const hist_entry *) a;
	const hist_entry *hb = (const hist_entry *) b;
	int ca = (ha->func ? ha->func->hits : ha->mod->hits);
	int cb = (hb->func ? hb->func->hits : hb->mod->hits);
	if (ca != cb)
	{
		return cb-ca;
	}
	if (ha->mod != hb->mod)
	{
		return strcmp(ha->mod->name, hb->mod->name);
	}
	if (ha->func && hb->func)
	{
		return strcmp(ha->func->name, hb->func->name);
	}
	return (ha->func ? 1 : -1);
}

void DoHist(FILE *out,int start,int end,int maxlines)
{
	if(start < 0)
		start = 0;
	if(end > prof_samples)
		end = prof_samples;
	if(start == end)
		return;
	if(reg_offsets[15] == -1)
	{
		printf("PC data required\n");
		return;
	}
	uint32_t *data = prof_data+start*prof_size;
	/* Reset histogram counters */
	for(int i=0;i<nummod;i++)
	{
		modules[i].hits = 0;
		for(int j=0;j<modules[i].numfunc;j++)
		{
			modules[i].funcs[j].hits = 0;
		}
	}
	/* Compute counts */
	while(start<end)
	{
		module *m=NULL;
		gpa_func *f=NULL;
		uint32_t pc = data[reg_offsets[15]];
		m = FindAddress(pc);
		if (m && (m->detail != -1))
		{
			if(m->detail < 3)
			{
				f = FindFunc(m,pc);
			}
			if(f)
			{
				f->hits++;
			}
			else if(m)
			{
				m->hits++;
			}
		}
		start++;
		data += prof_size;
	}
	/* Now sort by occurence */
	int count=0;
	for(int i=0;i<nummod;i++)
	{
		if (modules[i].hits)
		{
			count++;
		}
		for(int j=0;j<modules[i].numfunc;j++)
		{
			if (modules[i].funcs[j].hits)
			{
				count++;
			}
		}
	}
	if (!count)
		return;
	hist_entry *hist = safe_malloc(sizeof(hist_entry)*count);
	count=0;
	for(int i=0;i<nummod;i++)
	{
		if (modules[i].hits)
		{
			hist[count].mod = &modules[i];
			hist[count].func = NULL;
			count++;
		}
		for(int j=0;j<modules[i].numfunc;j++)
		{
			if (modules[i].funcs[j].hits)
			{
				hist[count].mod = &modules[i];
				hist[count].func = &modules[i].funcs[j];
				count++;
			}
		}
	}
	qsort(hist,count,sizeof(hist_entry),hist_comp_func);
	if (!maxlines)
	{
		maxlines = count;
	}
	fprintf(out,"Count        Module + Func\n");
	for(int i=0;i<maxlines;i++)
	{
		if (hist[i].func)
		{
			fprintf(out,"%12d %*s %s\n",hist[i].func->hits,maxmodnamelen,hist[i].mod->name,hist[i].func->name);
		}
		else
		{
			fprintf(out,"%12d %*s Unknown\n",hist[i].mod->hits,maxmodnamelen,hist[i].mod->name);
		}
	}
	free(hist);
}

/*

				User interface

*/

void DoCommand(char *cmd)
{
	char buf[1024];
	char buf2[1024];
	uint32_t start=0,end=0,i=0,a,b,h;
	if(sscanf(cmd,"loadprof %s",buf)==1)
		LoadProf(buf);
	else if(sscanf(cmd,"loadgpa %s %s",buf,buf2)==2)
	{
		module *m = FindModule(buf);
		if(!m)
			printf("Module '%s' not found\n",buf);
		else
			LoadGPA(m,buf2);
	}
	else if(sscanf(cmd,"loadabs %s",buf)==1)
	{
		module *m = FindModule("Application_space");
		LoadAbsolute(m,buf);
	}
	else if(sscanf(cmd,"loadsyms %s %s",buf,buf2)==2)
	{
		module *m = FindModule(buf);
		if(!m)
			printf("Module '%s' not found\n",buf);
		else
			LoadSyms(m,buf2);
	}
	else if(sscanf(cmd,"loadrom %s %s",buf,buf2)==2)
	{
		LoadROM(buf,buf2);
	}
	else if(!strcmp(cmd,"modules"))
	{
		static const char *detail = "-*lf+";
		static const char *unknown_detail = "-*+";
		FindAddress(0); /* Sort in order */
		printf("Num Start    End      GPA  Module\n");
		for(i=0;i<nummod;i++)
			printf("%3d %08x %08x %s %c%s\n",i,modules[i].start,modules[i].end,(modules[i].numline?"GPA":"   "),detail[modules[i].detail+1],modules[i].name);
		printf("--- -------- -------- --- %cUnknown\n",unknown_detail[unknown_filter+1]);
	}
	else if(sscanf(cmd,"dumpto %s",buf) == 1)
	{
		if(dumpto)
			fclose(dumpto);
		dumpto = fopen(buf,"w");
		if(!dumpto)
			printf("%s: Couldn't open\n",buf);
	}
	else if(!strcmp(cmd,"dumpto"))
	{
		if(dumpto)
			fclose(dumpto);
		dumpto = 0;
	}
	else if(sscanf(cmd,"dump %d + %d%n",&start,&end,&i)==2)
	{
		if(sscanf(cmd+i," max %d",&i)!=1)
			i = 0;
		DoDump((dumpto?dumpto:stdout),start,start+end,i);
	}
	else if(sscanf(cmd,"dump %d max %d",&start,&i)==2)
	{
		DoDump((dumpto?dumpto:stdout),start,prof_samples,i);
	}
	else if(sscanf(cmd,"dump %d %d%n",&start,&end,&i)==2)
	{
		if(sscanf(cmd+i," max %d",&i)!=1)
			i = 0;
		DoDump((dumpto?dumpto:stdout),start,end,i);
	}
	else if(sscanf(cmd,"dump max %d",&i)==1)
	{
		DoDump((dumpto?dumpto:stdout),0,prof_samples,i);
	}
	else if(!strcmp(cmd,"dump"))
	{
		DoDump((dumpto?dumpto:stdout),0,prof_samples,0);
	}
	else if(sscanf(cmd,"hist %d + %d%n",&start,&end,&i)==2)
	{
		if(sscanf(cmd+i," max %d",&i)!=1)
			i = 0;
		DoHist((dumpto?dumpto:stdout),start,start+end,i);
	}
	else if(sscanf(cmd,"hist %d max %d",&start,&i)==2)
	{
		DoHist((dumpto?dumpto:stdout),start,prof_samples,i);
	}
	else if(sscanf(cmd,"hist %d %d%n",&start,&end,&i)==2)
	{
		if(sscanf(cmd+i," max %d",&i)!=1)
			i = 0;
		DoHist((dumpto?dumpto:stdout),start,end,i);
	}
	else if(sscanf(cmd,"hist max %d",&i)==1)
	{
		DoHist((dumpto?dumpto:stdout),0,prof_samples,i);
	}
	else if(!strcmp(cmd,"hist"))
	{
		DoHist((dumpto?dumpto:stdout),0,prof_samples,0);
	}
	else if(sscanf(cmd,"graph %d %d + %d",&i,&start,&end)==3)
	{
		DoGraph((dumpto?dumpto:stdout),i,start,start+end);
	}
	else if(sscanf(cmd,"graph %d %d %d",&i,&start,&end)==3)
	{
		DoGraph((dumpto?dumpto:stdout),i,start,end);
	}
	else if(sscanf(cmd,"graph %d",&i)==1)
	{
		DoGraph((dumpto?dumpto:stdout),i,0,prof_samples);
	}
	else if(sscanf(cmd,"filter module %c%n",buf,&start) == 1)
	{
		int detail = -1;
		if(buf[0] == '*')
			detail = 0;
		else if(buf[0] == 'l')
			detail = 1;
		else if(buf[0] == 'f')
			detail = 2;
		else if(buf[0] == '+')
			detail = 3;
		char *c = cmd+start;
		while(c && *c)
		{
			char *next = strchr(c,' ');
			if(next)
				*(next++) = 0;
			if(*c == '*')
			{
				for(i=0;i<nummod;i++)
					modules[i].detail = detail;
			}
			else if(*c)
			{
				module *m = FindModule(c);
				if(!m)
					printf("Module not found: %s\n",c);
				else
					m->detail = detail;
			}
			c = next;
		}
	}
	else if(sscanf(cmd,"filter reg %x",&start)==1)
		reg_filter = reg_mask&start;
	else if(sscanf(cmd,"filter unknown %c",buf)==1)
	{
		if(buf[0] == '+')
			unknown_filter = 1;
		else if(buf[0] == '*')
			unknown_filter = 0;
		else
			unknown_filter = -1;
	}
	else if(sscanf(cmd,"info %08x",&start)==1)
	{
		module *m = FindAddress(start);
		if(m)
		{
			printf("Address: %08x Module: %08x+%s\n",start,start-m->start,m->name);
			gpa_func *f = FindFunc(m,start);
			gpa_line *l = FindLine(m,start);
			if(f)
				printf("Function: %08x+%s\n",start-f->start,f->name);
			if(l)
				printf("File: %s\nLine: %d\n",l->file,l->line);
		}
		else
		{
			printf("Unknown location\n");
			if(nummod)
				for(i=0;i<nummod-1;i++)
					if((modules[i].start < i) && (modules[i+1].start > i))
						printf("Nearest to %s (%08x-%08x) and %s (%08x-%08x)\n",modules[i].name,modules[i].start,modules[i].end,modules[i+1].name,modules[i+1].start,modules[i+1].end);
		}
	}
	else if(sscanf(cmd,"module %s %08x%n",buf,&start,&i) == 2)
	{
		while(cmd[i]==32)
			i++;
		if(cmd[i] == '+')
		{
			i = sscanf(cmd+i,"+ %08x",&end);
			end += start;
		}
		else
			i = sscanf(cmd+i,"%08x",&end);
		if(i)
		{
			module *m = FindModule(buf);
			if(m)
			{
				KillGPA(m);
				m->start = start;
				m->end = end;
				modules_inorder = false;
			}
			else
				AddModule(start,end,buf);
		}
		else
			printf("Bad end address\n");
	}
	else if(sscanf(cmd,"script %s",buf)==1)
	{
		FILE *f = fopen(buf,"r");
		if(!f)
			printf("%s: Couldn't open\n",buf);
		else
		{
			while(!feof(f) && fgets(buf,1024,f))
			{
				trim(buf);
				printf(">%s\n",buf);
				DoCommand(buf);
			}
			fclose(f);
		}
	}
	else if(cmd[0] == '*')
		system(cmd+1);
	else if(!strcmp(cmd,"quit") || !strcmp(cmd,"exit"))
	{
		exit(0);
	}
	else if(!strcmp(cmd,"psr raw"))
		psrmode = 0;
	else if(!strcmp(cmd,"psr simple"))
		psrmode = 1;
	else if(!strcmp(cmd,"plist"))
	{
		if(!perf_count)
			printf("No performance counters in profile data\n");
		for(i=0;i<perf_count;i++)
			printf("Counter #%d: %s\n",i,perf_names[i]);
	}
	else if(!strncmp(cmd,"filter perf ",12))
	{
		perf_showcount=0;
		char *c = cmd+11;
		do {
			if(sscanf(c," %d/%d%n",&perf_show[perf_showcount][0],&perf_show[perf_showcount][1],&i) == 2)
			{
				perf_showcount++;
				c += i;
			}
			else if(sscanf(c," %d%n",&perf_show[perf_showcount][0],&i) == 1)
			{
				perf_show[perf_showcount++][1] = -1;
				c += i;
			}
			else
				break;
		} while(perf_showcount < 32);
	}
	else if(sscanf(cmd,"pgraph %d/%d %d*%d %d + %d",&a,&b,&i,&h,&start,&end)==6)
	{
		DoPGraph((dumpto?dumpto:stdout),a,b,i,h,start,start+end);
	}
	else if(sscanf(cmd,"pgraph %d/%d %d*%d %d %d",&a,&b,&i,&h,&start,&end)==6)
	{
		DoPGraph((dumpto?dumpto:stdout),a,b,i,h,start,end);
	}
	else if(sscanf(cmd,"pgraph %d/%d %d*%d",&a,&b,&i,&h)==4)
	{
		DoPGraph((dumpto?dumpto:stdout),a,b,i,h,0,prof_samples);
	}
	else if(sscanf(cmd,"pgraph %d %d*%d %d + %d",&a,&i,&h,&start,&end)==5)
	{
		DoPGraph((dumpto?dumpto:stdout),a,-1,i,h,start,start+end);
	}
	else if(sscanf(cmd,"pgraph %d %d*%d %d %d",&a,&i,&h,&start,&end)==5)
	{
		DoPGraph((dumpto?dumpto:stdout),a,-1,i,h,start,end);
	}
	else if(sscanf(cmd,"pgraph %d %d*%d",&a,&i,&h)==3)
	{
		DoPGraph((dumpto?dumpto:stdout),a,-1,i,h,0,prof_samples);
	}
	else if(!strcmp(cmd,"help"))
	{
		printf("help                                   Display this help\n");
		printf("script <file>                          Load commands from <file>\n");
		printf("*<command>                             Run shell command\n");
		printf("quit                                   Quit\n");
		printf("exit                                   Quit\n");
		printf("\n");
		printf("loadprof <filename>                    Load profiling data\n");
		printf("loadgpa <modulename> <filename>        Load GPA file\n");
		printf("loadabs <filename>                     Load Absolute file containing 'poked'\n");
		printf("                                       function names\n");
		printf("loadsyms <modulename> <filename>       Load symbols file\n");
		printf("                                       (Norcroft -Symbols option)\n");
		printf("loadrom <builddir> <romname>           Load module list and symbols from ROM\n");
		printf("                                       build tree. E.g.\n");
		printf("                                       'loadrom <Build$Dir> aUVZ00-00'\n");
		printf("modules                                List modules\n");
		printf("module <name> <start> [+ <len>|<end>]  Add/update module\n");
		printf("\n");
		printf("info <addr>                            Display information about address\n");
		printf("plist                                  Show available performance counters\n");
		printf("dump <start> + <count> [max <lines>]   Dump samples, using configured filters\n");
		printf("dump <start> <end> [max <lines>]\n");
		printf("dump <start> max <lines>\n");
		printf("dump [max <lines>]\n");
		printf("graph <width> <start> + <count>        ASCII graph of CPU usage\n");
		printf("graph <width> <start> <end>\n");       
		printf("graph <width>\n");                     
		printf("hist <start> + <count> [max <lines>]   Generate a histogram showing frequency\n");
		printf("hist <start> <end> [max <lines>]       of occurrence of functions/modules\n");
		printf("hist [max <lines>]\n");
		printf("pgraph <ctr> <w>*<h> <start> + <count> ASCII graph of a performance counter,\n");
		printf("pgraph <ctr> <w>*<h> <start> <end>     or of one counter divided by another\n");
		printf("pgraph <ctr> <w>*<h>                   (see 'filter perf' for <ctr> syntax)\n");
		printf("dumpto [<filename>]                    Redirect 'dump', 'graph', 'hist' &\n");
		printf("                                       'pgraph' output to file\n");
		printf("\n");
		printf("filter module *|l|f|+|- <modules>...   Change module detail level:\n");
		printf("                                       *  Show all entries\n");
		printf("                                       l  Group by GPA lines\n");
		printf("                                       f  Group by GPA functions\n");
		printf("                                       +  Group by module\n");
		printf("                                       -  Hide module\n");
		printf("filter module *|l|f|+|- *              Change detail level for all modules\n");
		printf("filter unknown *|+|-                   Change 'Unknown' detail level:\n");
		printf("                                       *  Show all unknown entries\n");
		printf("                                       +  Group unknown entries together\n");
		printf("                                       -  Hide all unknown entries\n");
		printf("filter reg <hex regmask>               Set which registers to display\n");
		printf("filter perf ...                        Specify performance counters for 'dump':\n");
		printf("filter perf 0 1 2 [...]                Show counters #0, #1, #2\n");
		printf("filter perf 1/0 2/0 [...]              Show counters #1 and #2 divided by #0\n");
		printf("psr raw|simple                         Change PSR display mode\n");
	}
	else if(strlen(cmd))
	{
		char *c = strchr(cmd,' ');
		if(c)
			*c = 0;
		printf("Unrecognised command '%s'\nUse 'help' for help\n",cmd);
	}
}

int main(int argc,char **argv)
{
	if(argc>1)
		LoadProf(argv[1]);
	do {
		char buf[1024];
		printf(">");
		gets(buf);
		trim(buf);
		DoCommand(buf);
	} while(1);
	return 0;
}
