/*
 * XPK-SQSH decompression routine.
 * Note: this compression has a serious tendency to write past block end, so don't report that has errors.
 */
#include <string.h>

#include "Loaders.h"
#include "Song.h"
#include "FSysLog.h"

static const uint8_t ctable[] =
{ 2, 3, 4, 5, 6, 7, 8, 0
, 3, 2, 4, 5, 6, 7, 8, 0
, 4, 3, 5, 2, 6, 7, 8, 0
, 5, 4, 6, 2, 3, 7, 8, 0
, 6, 5, 7, 2, 3, 4, 8, 0
, 7, 6, 8, 2, 3, 4, 5, 0
, 8, 7, 6, 2, 3, 4, 5, 0
};

struct bitstream
{
	// bit buffer for rolling data bit by bit from the compressed file
	uint32_t       word;
	uint32_t       bitcount;
	// compressed data source
	const uint8_t* src;
	const uint8_t* srcend;
};

static void initGetb(struct bitstream* bs, const uint8_t* src, uint32_t src_length)
{
	bs->src = src;
	bs->srcend = src + src_length;
	bs->bitcount = 0;
}

// get nbits from the compressed stream
static uint32_t getb(struct bitstream* bs, int nbits)
{
	uint32_t val;

	while (bs->bitcount < nbits)
	{
		bs->word <<= 8;
		if (bs->src < bs->srcend)
			bs->word |= *bs->src++;
		bs->bitcount += 8;
	}

	val = bs->word << (32 - bs->bitcount);
	val >>= 32 - nbits;
	bs->bitcount -= nbits;

	return val;
}

static int32_t getsb(struct bitstream* bs, int nbits)
{
	int32_t val;

	while (bs->bitcount < nbits)
	{
		bs->word <<= 8;
		if (bs->src < bs->srcend)
			bs->word |= *bs->src++;
		bs->bitcount += 8;
	}

	val = bs->word << (32 - bs->bitcount);
	val >>= 32 - nbits;
	bs->bitcount -= nbits;

	return val;
}

const _kernel_oserror* Decomp_XPKF(SongHdr* pSong)
{
	const _kernel_oserror* e = NULL;
	char tag[4];
	uint32_t outSize, inSize;
	uint8_t* pSrc = NULL;
	const uint8_t* pCur;
	const uint8_t* pEnd;
	uint8_t* dst;
	uint8_t* dstend;
	struct bitstream bs;

	e = FileLoad_ReadInt(pSong, (uint32_t*) tag, 4);
	if (e) return e;

	if (memcmp(tag, "XPKF", 4) != 0)
		return Loaders_NotThisType;

	e = FileLoad_ReadReverseInt(pSong, &inSize, 4);
	if (e) return e;

	e = FileLoad_ReadInt(pSong, (uint32_t*) tag, 4);
	if (memcmp(tag, "SQSH", 4) != 0)
		return Loaders_NotThisType;

	if (!e) FileLoad_ReadReverseInt(pSong, &outSize, 4);
	if (e) return e;

	if (inSize > outSize)
		return Loaders_NotThisType;
	if (inSize + 8 > FileLoad_GetSize(pSong))
		return Loaders_NotThisType;

	e = Loaders_Alloc(pSong, (void**) &pSong->pLoaderData->Decomp.pMemory, outSize);
	if (e) return e;
	pSong->pLoaderData->Decomp.MemSize = outSize;

	e = Loaders_Alloc(pSong, (void**) &pSrc, inSize - 8);
	if (e) return e;
	e = FileLoad_Read(pSong, pSrc, inSize - 8);
	if (e) return e;

	dst = pSong->pLoaderData->Decomp.pMemory;
	dstend = dst + outSize;

	pCur = pSrc + 20;
	pEnd = pSrc + inSize;

	while (dst < dstend)
	{
		uint32_t type, in, out;

		if (pEnd - pCur < 10)
		{
			e = Loaders_Error(pSong, pCur - pSrc + 16, "XPKF: Not enough bytes for block header");
			goto decomp_error;
		}

		type = *pCur++;
		pCur += 3; // 1 ? + 2 cheksum
		in = *pCur++;
		in = (in << 8) + *pCur++; // packed
		if (type)
			in = (in + 3) & 0xfffc; // align
		out = *pCur++;
		out = (out << 8) + *pCur++; // unpacked

		if (pCur + in > pEnd)
		{
			e = Loaders_Error(pSong, pCur - pSrc - 4 + 16, "XPKF: Block size to larger, %d > %d"
							, in, pEnd - pCur);
			goto decomp_error;
		}

		if (dst + out > dstend)
		{
			e = Loaders_Error(pSong, pCur - pSrc - 2 + 16, "XPKF: Block target size to large, %d > %d"
							, out, dstend - dst);
			goto decomp_error;
		}

		switch(type)
		{
			case 0: // verbatim copy
			{
				memcpy(dst, pCur, in);
				dst += in;
			}
			break;
			case 1: // compressed
			{
				int d1 = 0, d2 = 0, oldc = 0, l, c, val;
				const uint8_t* const blockend = dst + out;

				*dst++ = val = pCur[2];
				initGetb(&bs, pCur + 3, in - 3);

				while (dst < blockend)
				{
					d2 -= d2 >> 3;

					if (d1 < 8)
					{
						if (getb(&bs, 1))
							goto copy_data;

						l = 1;
						c = 8;
					}
					else
					{
						if (getb(&bs, 1))
						{
							c = 8;
							if (c == oldc)
							{
								if (d2 >= 20)
								{
									l = 2;
									d2 += 8;
								}
								else
									l = 1;
							}
							else
							{
								c = oldc;
								l = 5;
								d2 += 8;
							}
						}
						else
						{
							if (!getb(&bs, 1))
								goto copy_data;

							if (getb(&bs, 1))
							{
								if (getb(&bs, 1))
									c = 4 + getb(&bs, 2);
								else
									c = 3;
							}
							else
							{
								c = 2;
							}

							c = ctable[8 * oldc + c - 17];
							if (c != 8)
							{
								l = 5;
								d2 += 8;
							}
							else
							{
								if (d2 >= 20)
								{
									l = 2;
									d2 += 8;
								}
								else
								{
									l = 1;
								}
							}
						}
					}

					if (dst + l > blockend)
					{
						SysLog_Log(SysLog_High
								, "XPKF: writing past block end %d > %d"
								, l, blockend - dst
								);
						l = blockend - dst;
					}

					while (l > 0)
					{
						val -= getsb(&bs, c);
						*dst++ = val;
						l--;
					}

					if (d1 != 31)
						d1++;

					oldc = c;

					continue;
copy_data:
					{
						int offset;
						const uint8_t* src;

						if (!getb(&bs, 1))
							l = getb(&bs, 1) + 2;
						else if (!getb(&bs, 1))
							l = getb(&bs, 1) + 4;
						else if (!getb(&bs, 1))
							l = getb(&bs, 1) + 6;
						else if (!getb(&bs, 1))
							l = getb(&bs, 3) + 8;
						else
							l = getb(&bs, 5) + 16;

						if (!getb(&bs, 1))
						{
							if (!getb(&bs, 1))
							{
								c = 8;
								offset = 0;
							}
							else
							{
								c = 14;
								offset = -0x1100;
							}
						}
						else
						{
							c = 12;
							offset = -0x100;
						}

						l -= 3;
						if (l >= 0)
						{
							if (l)
								d1--;
							d1--;
							if (d1 < 0)
								d1 = 0;
						}

						l += 3;
						src = dst + offset - getb(&bs, c) - 1;

						// Note: src + l may be > dst, it is used to replicate bytes
						if ((src < pSong->pLoaderData->Decomp.pMemory) || (src >= dst))
						{
							e = Loaders_Error
									(pSong, bs.src - pSrc + 16
									, "XPKF: copying out of bounds from: %08x, to: %08x, l: %x"
									, src - pSong->pLoaderData->Decomp.pMemory, dst - pSong->pLoaderData->Decomp.pMemory, l
									);
							goto decomp_error;
						}

						if (dst + l > blockend)
						{
							SysLog_Log(SysLog_High
									, "XPKF: copying past block end %d > %d"
									, l, blockend - dst
									);
							l = blockend - dst;
						}

						while (l > 0)
						{
							*dst++ = *src++;
							l--;
						}
						val = dst[-1];
					}
				}
			}
			break;
			default:
			{
				e = Loaders_Error(pSong, pCur - pSrc + 16, "XPKF: bad packet type %d", type);
				goto decomp_error;
			}
		}

		pCur += in;
	}

decomp_error:
	Loaders_Free(pSong, pSrc);

	return e;
}
