/*
 * slice.c
 * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
 *
 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
 * See http://libmpeg2.sourceforge.net/ for updates.
 *
 * mpeg2dec is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * mpeg2dec is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * Modifications for this RISCOS port, Copyright (c) 2002 P.Everett
 * <peter@everett9981.freeserve.co.uk>
 */

#include "inttypes.h"
#include <string.h>
#include <stdio.h>
#include "mpeg2_inte.h"
#include "config.h"
#include "timer1.h"
#include "ka_log.h"
#include "idct.h"

#define inline

#include "vlc.h"

mpeg2_mc_t mc_functions;

static uint8_t non_linear_quantizer_scale [] = {
     0,  1,  2,  3,  4,  5,   6,   7,
     8, 10, 12, 14, 16, 18,  20,  22,
    24, 28, 32, 36, 40, 44,  48,  52,
    56, 64, 72, 80, 88, 96, 104, 112
};

static inline void bitstream_init (mpeg2_decoder_t * const decoder, const uint8_t * start)
{
	decoder->bitstream_buf = 0;
	decoder->bitstream_ptr = start;
	decoder->bitstream_bits = 0;
	GETWORD (decoder->bitstream_buf, 16, decoder->bitstream_ptr);
}

static inline int get_macroblock_modes (mpeg2_decoder_t * const decoder)
{
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)
#define bit_ptr (decoder->bitstream_ptr)
	int macroblock_modes;
	const MBtab * tab;

	NEEDBITS (bit_buf, bits, bit_ptr);

	switch (decoder->coding_type)
	{
		case I_TYPE:
		{
			tab = MB_I + UBITS (bit_buf, 2);
		}
		break;
		case P_TYPE:
		{
			tab = MB_P + UBITS (bit_buf, 6);
		}
		break;
		case B_TYPE:
		{
			tab = MB_B + UBITS (bit_buf, 6);
		}
		break;
		case D_TYPE:
		{
			if (UBITS (bit_buf, 1))
			{
				DUMPBITS (bit_buf, bits, 1);
				return MACROBLOCK_INTRA;
			}
			return 0; // Invalid
		}
		default:
			return 0; // Invalid
	}

	DUMPBITS (bit_buf, bits, tab->len);
	macroblock_modes = tab->modes;

	if (macroblock_modes & (MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD))
	{
		if ((decoder->picture_structure == FRAME_PICTURE)
		&&  decoder->frame_pred_frame_dct)
			macroblock_modes |= MC_FRAME;
		else
		{
			macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT;
			DUMPBITS (bit_buf, bits, 2);
		}
	}

	if ((macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
	&&  (decoder->picture_structure == FRAME_PICTURE)
	&&  (decoder->frame_pred_frame_dct == 0))
	{
		macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED;
		DUMPBITS (bit_buf, bits, 1);
	}

	return macroblock_modes;
#undef bit_buf
#undef bits
#undef bit_ptr
}

static inline int get_quantizer_scale (mpeg2_decoder_t * const decoder)
{
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)
#define bit_ptr (decoder->bitstream_ptr)

	int quantizer_scale_code;

	NEEDBITS (bit_buf, bits, bit_ptr);

	quantizer_scale_code = UBITS (bit_buf, 5);
	DUMPBITS (bit_buf, bits, 5);

	if (decoder->q_scale_type)
		return non_linear_quantizer_scale [quantizer_scale_code];
	else
		return quantizer_scale_code << 1;
#undef bit_buf
#undef bits
#undef bit_ptr
}

static inline int bound_motion_vector (const int vector, const int f_code)
{
#if 0
	int limit;

	limit = 16 << f_code;

	if (vector >= limit)
		return vector - 2*limit;
	else if (vector < -limit)
		return vector + 2*limit;
	else return vector;
#else
	return ((int32_t)vector << (27 - f_code)) >> (27 - f_code);
#endif
}

static inline int add_motion_delta (int motion, mpeg2_decoder_t * const decoder, const int f_code)
{
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)
#define bit_ptr (decoder->bitstream_ptr)

	int delta;
	int sign;
	const MVtab * tab;

	NEEDBITS (bit_buf, bits, bit_ptr);

	if (bit_buf & 0x80000000)
	{
		DUMPBITS (bit_buf, bits, 1);
		delta = 0;
	}
	else
	{
		if (bit_buf >= 0x0c000000)
			tab = MV_4 + UBITS (bit_buf, 4);
		else
			tab = MV_10 + UBITS (bit_buf, 10);

		if (!tab->len)
		{
			if (config.debug & cfg_printwarnings)
				ka_log(ka_log_error | ka_log_video, "Invalid motion code at (%d, %d)", decoder->cur_x, decoder->cur_y);
			return 0; // reset motion
		}

		delta = (tab->delta << f_code) + 1;
		bits += tab->len + 1;
		bit_buf <<= tab->len;

		sign = SBITS (bit_buf, 1);
		bit_buf <<= 1;

		if (f_code)
		{
			NEEDBITS (bit_buf, bits, bit_ptr);
			delta += UBITS (bit_buf, f_code);
			DUMPBITS (bit_buf, bits, f_code);
		}

		delta = (delta ^ sign) - sign;
	}

	return bound_motion_vector(motion + delta, f_code);
#undef bit_buf
#undef bits
#undef bit_ptr
}

static inline int get_dmv (mpeg2_decoder_t * const decoder)
{
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)
#define bit_ptr (decoder->bitstream_ptr)
	const DMVtab * tab;

	NEEDBITS (bit_buf, bits, bit_ptr);
	tab = DMV_2 + UBITS (bit_buf, 2);
	DUMPBITS (bit_buf, bits, tab->len);
	return tab->dmv;
#undef bit_buf
#undef bits
#undef bit_ptr
}

static inline int get_coded_block_pattern (mpeg2_decoder_t * const decoder)
{
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)
#define bit_ptr (decoder->bitstream_ptr)
	const CBPtab * tab;
	int pattern;

	NEEDBITS (bit_buf, bits, bit_ptr);

	if (bit_buf >= 0x20000000)
	{
		tab = CBP_7 + (UBITS (bit_buf, 7) - 16);
		DUMPBITS (bit_buf, bits, tab->len);
		pattern = tab->cbp;
	}
	else
	{
		tab = CBP_9 + UBITS (bit_buf, 9);
		DUMPBITS (bit_buf, bits, tab->len);
		pattern = tab->cbp;
	}

	pattern <<= 6;

	switch(decoder->chroma_type)
	{
		case SEQ_CHROMA_444:
		{
			NEEDBITS (bit_buf, bits, bit_ptr);
			pattern += UBITS (bit_buf, 6);
			DUMPBITS(bit_buf, bits, 6);
		}
		break;
		case SEQ_CHROMA_422:
		{
			NEEDBITS (bit_buf, bits, bit_ptr);
			pattern += (UBITS (bit_buf, 2) << 4);
			DUMPBITS(bit_buf, bits, 2);
		}
		break;
	}

	return pattern;
#undef bit_buf
#undef bits
#undef bit_ptr
}

static inline int get_luma_dc_dct_diff (mpeg2_decoder_t * const decoder)
{
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)
#define bit_ptr (decoder->bitstream_ptr)
	const DCtab * tab;
	int size;
	int dc_diff;

	if (bit_buf < 0xf8000000)
	{
		tab = DC_lum_5 + UBITS (bit_buf, 5);
		size = tab->size;
		if (size)
		{
			bits += tab->len + size;
			bit_buf <<= tab->len;
			dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
			bit_buf <<= size;
			return dc_diff;
		}
		else
		{
			DUMPBITS (bit_buf, bits, 3);
			return 0;
		}
	}
	else
	{
		tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0);
		size = tab->size;
		DUMPBITS (bit_buf, bits, tab->len);
		NEEDBITS (bit_buf, bits, bit_ptr);
		dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
		DUMPBITS (bit_buf, bits, size);
		return dc_diff;
	}
#undef bit_buf
#undef bits
#undef bit_ptr
}

static inline int get_chroma_dc_dct_diff (mpeg2_decoder_t * const decoder)
{
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)
#define bit_ptr (decoder->bitstream_ptr)
	const DCtab * tab;
	int size;
	int dc_diff;

	if (bit_buf < 0xf8000000)
	{
		tab = DC_chrom_5 + UBITS (bit_buf, 5);
		size = tab->size;
		if (size)
		{
			bits += tab->len + size;
			bit_buf <<= tab->len;
			dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
			bit_buf <<= size;
			return dc_diff;
		}
		else
		{
			DUMPBITS (bit_buf, bits, 2);
			return 0;
		}
	}
	else
	{
		tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0);
		size = tab->size;
		DUMPBITS (bit_buf, bits, tab->len + 1);
		NEEDBITS (bit_buf, bits, bit_ptr);
		dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size);
		DUMPBITS (bit_buf, bits, size);
		return dc_diff;
	}
#undef bit_buf
#undef bits
#undef bit_ptr
}

#define SATURATE(val) \
{ \
	if (val >= 2048) val = 2047; \
	else if (val < -2048) val = -2048; \
}

static int get_intra_block_B14 (mpeg2_decoder_t * const decoder, const uint8_t * quant_matrix)
{
	int i = 0;
	int mismatch;
	const uint8_t * scan    = decoder->scan;
	int quantizer_scale     = decoder->quantizer_scale;
	uint32_t bit_buf        = decoder->bitstream_buf;
	int bits                = decoder->bitstream_bits;
	const uint8_t * bit_ptr = decoder->bitstream_ptr;
	int32_t * dest          = decoder->DCTblock;
	const DCTtab * tab;

	mismatch = ~dest[0];

	while (1)
	{
		NEEDBITS (bit_buf, bits, bit_ptr);

		if (bit_buf >= 0x28000000)
			tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
		else if (bit_buf >= 0x04000000)
			tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
		else if (bit_buf >= 0x02000000)
			tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
		else if (bit_buf >= 0x00800000)
			tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
		else if (bit_buf >= 0x00200000)
			tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
		else
		{
			tab = DCT_16 + UBITS (bit_buf, 16);
			bit_buf <<= 16;
			GETWORD (bit_buf, bits + 16, bit_ptr);
		}

		i += tab->run;

		if (i < 64)
		{
			// valid code
			int j, val;

			j = scan[i];
			bit_buf <<= tab->len;
			bits += tab->len + 1;
			val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;

			// if (bitstream_get (1)) val = -val;
			if (bit_buf & 0x80000000) val = -val;
			bit_buf <<= 1;

			SATURATE (val);
			dest[j] = val;
			mismatch ^= val;

			continue;
		}
		else if (tab->run == 129)
		{
			// end code
			dest[63] ^= mismatch & 1;
			DUMPBITS (bit_buf, bits, 2);
			break;
		}
		else if (tab->run == 65)
		{
			// escape code
			i += UBITS (bit_buf << 6, 6) - 64;

			if (i < 64)
			{
				int j, val;

				j = scan[i];

				DUMPBITS (bit_buf, bits, 12);
				NEEDBITS (bit_buf, bits, bit_ptr);
				val = (SBITS (bit_buf, 12) * quantizer_scale * quant_matrix[j]) >> 4;

				SATURATE (val);
				dest[j] = val;
				mismatch ^= val;

				DUMPBITS (bit_buf, bits, 12);

				continue;
			}
		}

		// illegal cases
		if (config.debug & cfg_printwarnings)
			ka_log(ka_log_error | ka_log_video, "Bad intra 14 VLC at (%d, %d)", decoder->cur_x, decoder->cur_y);
		i = -1;
		break;
	}

	decoder->bitstream_buf = bit_buf;
	decoder->bitstream_bits = bits;
	decoder->bitstream_ptr = bit_ptr;

	return i;
}

static int get_intra_block_B15 (mpeg2_decoder_t * const decoder, const uint8_t * quant_matrix)
{
	int i = 0;
	int mismatch;
	const uint8_t * scan    = decoder->scan;
	int quantizer_scale     = decoder->quantizer_scale;
	uint32_t bit_buf        = decoder->bitstream_buf;
	int bits                = decoder->bitstream_bits;
	const uint8_t * bit_ptr = decoder->bitstream_ptr;
	int32_t * dest          = decoder->DCTblock;
	const DCTtab * tab;

	mismatch = ~dest[0];

	while (1)
	{
		NEEDBITS (bit_buf, bits, bit_ptr);

		if (bit_buf >= 0x04000000)
			tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4);
		else if (bit_buf >= 0x02000000)
			tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8);
		else if (bit_buf >= 0x00800000)
			tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
		else if (bit_buf >= 0x00200000)
			tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
		else
		{
			tab = DCT_16 + UBITS (bit_buf, 16);
			bit_buf <<= 16;
			GETWORD (bit_buf, bits + 16, bit_ptr);
		}

		i += tab->run;

		if (i < 64)
		{
			// valid code
			int j, val;

			j = scan[i];
			bit_buf <<= tab->len;
			bits += tab->len + 1;
			val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;

			// if (bitstream_get (1)) val = -val;
			if (bit_buf & 0x80000000) val = -val;
			bit_buf <<= 1;

			SATURATE (val);
			dest[j] = val;
			mismatch ^= val;

			continue;
		}
		else if (tab->run == 129)
		{
			// end code
			dest[63] ^= mismatch & 1;
			DUMPBITS (bit_buf, bits, 4);
			break;
		}
		else if (tab->run == 65)
		{
			// escape code
			i += UBITS (bit_buf << 6, 6) - 64;

			if (i < 64)
			{
				int j, val;

				j = scan[i];

				DUMPBITS (bit_buf, bits, 12);
				NEEDBITS (bit_buf, bits, bit_ptr);
				val = (SBITS (bit_buf, 12) * quantizer_scale * quant_matrix[j]) >> 4;

				SATURATE (val);
				dest[j] = val;
				mismatch ^= val;

				DUMPBITS (bit_buf, bits, 12);

				continue;
			}
		}

		// illegal cases
		if (config.debug & cfg_printwarnings)
			ka_log(ka_log_error | ka_log_video, "Bad intra 15 VLC at (%d, %d)", decoder->cur_x, decoder->cur_y);
		i = -1;
		break;
	}

	decoder->bitstream_buf = bit_buf;
	decoder->bitstream_bits = bits;
	decoder->bitstream_ptr = bit_ptr;

	return i;
}

static int get_non_intra_block (mpeg2_decoder_t * const decoder, const uint8_t * quant_matrix)
{
	int i = -1;
	int mismatch = 1;
	const uint8_t * scan    = decoder->scan;
	int quantizer_scale     = decoder->quantizer_scale;
	uint32_t bit_buf        = decoder->bitstream_buf;
	int bits                = decoder->bitstream_bits;
	const uint8_t * bit_ptr = decoder->bitstream_ptr;
	int32_t * dest          = decoder->DCTblock;
	const DCTtab * tab;

	NEEDBITS (bit_buf, bits, bit_ptr);
	if (bit_buf >= 0x28000000)
	{
		tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5);
		goto entry_1;
	}
	else
		goto entry_2;

	while (1)
	{
		NEEDBITS (bit_buf, bits, bit_ptr);

		if (bit_buf >= 0x28000000)
		{
			tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
			goto entry_1;
		}
entry_2:
		if (bit_buf >= 0x04000000)
			tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
		else if (bit_buf >= 0x02000000)
			tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
		else if (bit_buf >= 0x00800000)
			tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
		else if (bit_buf >= 0x00200000)
			tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
		else
		{
			tab = DCT_16 + UBITS (bit_buf, 16);
			bit_buf <<= 16;
			GETWORD (bit_buf, bits + 16, bit_ptr);
		}

entry_1:

		i += tab->run;

		if (i < 64)
		{
			// valid code
			int j, val;

			j = scan[i];
			bit_buf <<= tab->len;
			bits += tab->len + 1;
			val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5;

			// if (bitstream_get (1)) val = -val;
			if (bit_buf & 0x80000000) val = -val;
			bit_buf <<= 1;

			SATURATE (val);
			dest[j] = val;
			mismatch ^= val;

			continue;
		}
		else if (tab->run == 129)
		{
			// end code
			dest[63] ^= mismatch & 1;
			DUMPBITS (bit_buf, bits, 2);
			break;
		}
		else if (tab->run == 65)
		{
			// escape code
			i += UBITS (bit_buf << 6, 6) - 64;

			if (i < 64)
			{
				int j, val;

				j = scan[i];

				DUMPBITS (bit_buf, bits, 12);
				NEEDBITS (bit_buf, bits, bit_ptr);
				val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1;
				val = (val * quantizer_scale * quant_matrix[j]) >> 5;

				SATURATE (val);
				dest[j] = val;
				mismatch ^= val;

				DUMPBITS (bit_buf, bits, 12);

				continue;
			}
		}

		// illegal cases
		if (config.debug & cfg_printwarnings)
			ka_log(ka_log_error | ka_log_video, "Bad non intra VLC at (%d, %d)", decoder->cur_x, decoder->cur_y);
		i = -1;
		break;
	}

	decoder->bitstream_buf = bit_buf;
	decoder->bitstream_bits = bits;
	decoder->bitstream_ptr = bit_ptr;

	return i;
}

static int get_mpeg1_intra_block (mpeg2_decoder_t * const decoder, const uint8_t * quant_matrix)
{
	int i = 0;
	const uint8_t * scan    = decoder->scan;
	int quantizer_scale     = decoder->quantizer_scale;
	uint32_t bit_buf        = decoder->bitstream_buf;
	int bits                = decoder->bitstream_bits;
	const uint8_t * bit_ptr = decoder->bitstream_ptr;
	int32_t * dest          = decoder->DCTblock;
	const DCTtab * tab;

	while (1)
	{
		NEEDBITS (bit_buf, bits, bit_ptr);

		if (bit_buf >= 0x28000000)
			tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
		else if (bit_buf >= 0x04000000)
			tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
		else if (bit_buf >= 0x02000000)
			tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
		else if (bit_buf >= 0x00800000)
			tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
		else if (bit_buf >= 0x00200000)
			tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
		else
		{
			tab = DCT_16 + UBITS (bit_buf, 16);
			bit_buf <<= 16;
			GETWORD (bit_buf, bits + 16, bit_ptr);
		}

		i += tab->run;

		if (i < 64)
		{
			// valid code
			int j, val;

			j = scan[i];
			bit_buf <<= tab->len;
			bits += tab->len + 1;
			val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4;

			// oddification
			val = (val - 1) | 1;

			// if (bitstream_get (1)) val = -val;
			if (bit_buf & 0x80000000) val = -val;
			bit_buf <<= 1;

			SATURATE (val);
			dest[j] = val;

			continue;
		}
		else if (tab->run == 129)
		{
			// end code
			DUMPBITS (bit_buf, bits, 2);
			break;
		}
		else if (tab->run == 65)
		{
			// escape code
			i += UBITS (bit_buf << 6, 6) - 64;

			if (i < 64)
			{
				int j, val;

				j = scan[i];

				DUMPBITS (bit_buf, bits, 12);
				NEEDBITS (bit_buf, bits, bit_ptr);
				val = SBITS (bit_buf, 8);
				DUMPBITS (bit_buf, bits, 8);
				if (! (val & 0x7f))
				{
					val = UBITS (bit_buf, 8) + 2 * val;
					DUMPBITS (bit_buf, bits, 8);
				}
				val = (val * quantizer_scale * quant_matrix[j]) >> 4;

				// oddification
				val = (val + ~SBITS (val, 1)) | 1;

				SATURATE (val);
				dest[j] = val;

				continue;
			}
		}

		// illegal cases
		if (config.debug & cfg_printwarnings)
			ka_log(ka_log_error | ka_log_video, "Bad intra MPEG1 VLC at (%d, %d)", decoder->cur_x, decoder->cur_y);
		i = -1;
		break;
	}

	decoder->bitstream_buf = bit_buf;
	decoder->bitstream_bits = bits;
	decoder->bitstream_ptr = bit_ptr;

	return i;
}

static int get_mpeg1_non_intra_block (mpeg2_decoder_t * const decoder, const uint8_t * quant_matrix)
{
	int i = -1;
	const uint8_t * scan    = decoder->scan;
	int quantizer_scale     = decoder->quantizer_scale;
	uint32_t bit_buf        = decoder->bitstream_buf;
	int bits                = decoder->bitstream_bits;
	const uint8_t * bit_ptr = decoder->bitstream_ptr;
	int32_t * dest          = decoder->DCTblock;
	const DCTtab * tab;

	NEEDBITS (bit_buf, bits, bit_ptr);
	if (bit_buf >= 0x28000000)
	{
		tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5);
		goto entry_1;
	}
	else
		goto entry_2;

	while (1)
	{
		NEEDBITS (bit_buf, bits, bit_ptr);

		if (bit_buf >= 0x28000000)
		{
			tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5);
			goto entry_1;
		}
entry_2:
		if (bit_buf >= 0x04000000)
			tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4);
		else if (bit_buf >= 0x02000000)
			tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8);
		else if (bit_buf >= 0x00800000)
			tab = DCT_13 + (UBITS (bit_buf, 13) - 16);
		else if (bit_buf >= 0x00200000)
			tab = DCT_15 + (UBITS (bit_buf, 15) - 16);
		else
		{
			tab = DCT_16 + UBITS (bit_buf, 16);
			bit_buf <<= 16;
			GETWORD (bit_buf, bits + 16, bit_ptr);
		}

entry_1:
		i += tab->run;

		if (i < 64)
		{
			// valid code
			int j, val;

			j = scan[i];
			bit_buf <<= tab->len;
			bits += tab->len + 1;
			val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5;

			// oddification
			val = (val - 1) | 1;

			// if (bitstream_get (1)) val = -val;
			if (bit_buf & 0x80000000) val = -val;
			bit_buf <<= 1;

			SATURATE (val);
			dest[j] = val;

			continue;
		}
		else if (tab->run == 129)
		{
			// end code
			DUMPBITS (bit_buf, bits, 2);
			break;
		}
		else if (tab->run == 65)
		{
			// escape code
			i += UBITS (bit_buf << 6, 6) - 64;

			if (i < 64)
			{
				int j, val;

				j = scan[i];

				DUMPBITS (bit_buf, bits, 12);
				NEEDBITS (bit_buf, bits, bit_ptr);
				val = SBITS (bit_buf, 8);
				DUMPBITS (bit_buf, bits, 8);
				if (! (val & 0x7f))
				{
					val = UBITS (bit_buf, 8) + 2 * val;
					DUMPBITS (bit_buf, bits, 8);
				}
				val = 2 * (val + SBITS (val, 1)) + 1;
				val = (val * quantizer_scale * quant_matrix[j]) >> 5;

				// oddification
				val = (val + ~SBITS (val, 1)) | 1;

				SATURATE (val);
				dest[j] = val;

				continue;
			}
		}

		// illegal cases
		if (config.debug & cfg_printwarnings)
			ka_log(ka_log_error | ka_log_video, "Bad non intra MPEG1 VLC at (%d, %d)", decoder->cur_x, decoder->cur_y);
		i = -1;
		break;
	}

	decoder->bitstream_buf = bit_buf;
	decoder->bitstream_bits = bits;
	decoder->bitstream_ptr = bit_ptr;

	return i;
}

typedef void (*fn_slice_DCT) ( mpeg2_decoder_t * decoder
                             , int cc
                             , uint8_t * dest
                             , int stride);

static inline void slice_intra_DCT (mpeg2_decoder_t * decoder, int cc,
            uint8_t * dest, int stride)
{
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)
#define bit_ptr (decoder->bitstream_ptr)
	const uint8_t * quant_matrix;
	int last;

	NEEDBITS (bit_buf, bits, bit_ptr);

	// Get the intra DC coefficient and inverse quantize it
	if (cc == 0)
	{
		quant_matrix = decoder->lumi_intra_quantizer_matrix;
		decoder->dc_dct_pred[0] += get_luma_dc_dct_diff (decoder);
	}
	else
	{
		quant_matrix = decoder->chroma_intra_quantizer_matrix;
		decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff (decoder);
	}

	memset (decoder->DCTblock, 0, 64 * sizeof (decoder->DCTblock[0]));

	decoder->DCTblock[0] =
	decoder->dc_dct_pred[cc] << (3 - decoder->intra_dc_precision);
	SATURATE(decoder->DCTblock[0]);

	if (decoder->mpeg1)
	{
		if (decoder->coding_type != D_TYPE)
			last = get_mpeg1_intra_block (decoder, quant_matrix);
		else
			last = 129;
	}
	else if (decoder->intra_vlc_format)
		last = get_intra_block_B15 (decoder, quant_matrix);
	else
		last = get_intra_block_B14 (decoder, quant_matrix);

	if (cc && (decoder->options & MPEG2_OPT_MONOCHROME))
		return;

	if (last == -1)
	{
		// invalid, clear (0 for luma, 0x80 for chroma)
		if (cc) cc = 0x80;
		for (int i = 8; i > 0; i--, dest += stride)
			memset(dest, cc, 8);
	}
	else if (last == 129)
		idct_block_copy_dc (decoder->DCTblock, dest, stride);
	else
		idct_block_copy (decoder->DCTblock, dest, stride);
#undef bit_buf
#undef bits
#undef bit_ptr
}

static inline void slice_non_intra_DCT (mpeg2_decoder_t * const decoder, int cc,
                                    uint8_t * dest, int stride)
{
	const uint8_t * quant_matrix;
	int last;

	if (cc == 0)
		quant_matrix = decoder->lumi_non_intra_quantizer_matrix;
	else
		quant_matrix = decoder->chroma_non_intra_quantizer_matrix;

	memset (decoder->DCTblock, 0, 64 * sizeof (decoder->DCTblock[0]));

	if (decoder->mpeg1)
		last = get_mpeg1_non_intra_block (decoder, quant_matrix);
	else
		last = get_non_intra_block (decoder, quant_matrix);

	if (cc && (decoder->options & MPEG2_OPT_MONOCHROME))
		return;

	if (last == -1)
	{
		// invalid, clear (0 for luma, 0x80 for chroma)
		if (cc) cc = 0x80;
		for (int i = 8; i > 0; i--, dest += stride)
			memset(dest, cc, 8);
	}
	else if (last == 129)
		idct_block_add_dc (decoder->DCTblock, dest, stride);
	else
		idct_block_add (decoder->DCTblock, dest, stride);
}

// Note: x_pred, y_pred are 1/2 units
static inline void motion_block (mpeg2_decoder_t* const decoder, mpeg2_mc_fct** table,
         int x_pred, int y_pred, yuv_bufs* src)
{
	int xy_half;
	int src_offset;
	int dst_offset;
	int height = 16;

	// luminance
	xy_half = ((y_pred & 1) << 1) | (x_pred & 1);
	src_offset = decoder->cur_offset + (x_pred >> 1) + (y_pred >> 1) * decoder->stride;
	dst_offset = decoder->cur_offset;
	// check if we remain in source
	if ((src_offset < 0)
	||  (src_offset + (x_pred & 1) + (height - 1 + (y_pred & 1)) * decoder->stride > decoder->max_offset))
	{
		if (config.debug & cfg_printwarnings)
			ka_log(ka_log_error | ka_log_video
			     , "Motion out of bounds at (%d, %d) move by (%d /2, %d /2)"
			     , decoder->cur_x, decoder->cur_y, x_pred, y_pred
			     );
		x_pred = 0;
		y_pred = 0;
		src_offset = decoder->cur_offset;
	}

	table[xy_half] (decoder->dest.y + dst_offset, src->y + src_offset, decoder->stride, height);

	if (decoder->options & MPEG2_OPT_MONOCHROME)
		return;

	// crominance
	if (decoder->chroma_wshift) x_pred /= 2; // rounding toward 0
	if (decoder->chroma_hshift) y_pred /= 2; // rounding toward 0
	height >>= decoder->chroma_hshift;
	xy_half = 4*decoder->chroma_wshift + (((y_pred & 1) << 1) | (x_pred & 1));
	src_offset = decoder->cur_uvoffset + (x_pred >> 1) + (y_pred >> 1) * decoder->uvstride;
	dst_offset = decoder->cur_uvoffset;

	table[xy_half] (decoder->dest.cb + dst_offset, src->cb + src_offset, decoder->uvstride, height);
	table[xy_half] (decoder->dest.cr + dst_offset, src->cr + src_offset, decoder->uvstride, height);
}

// Note: x_pred, y_pred are 1/2 units
static inline void motion_block8 (mpeg2_decoder_t* const decoder, mpeg2_mc_fct** table,
         int x_pred, int y_pred, yuv_bufs* src, int y)
{
	int xy_half;
	int src_offset;
	int dst_offset;
	int height = 8;

	y_pred += y * (height << 1);

	// luminance
	xy_half = ((y_pred & 1) << 1) | (x_pred & 1);
	src_offset = decoder->cur_offset + (x_pred >> 1) + (y_pred >> 1) * decoder->stride;
	dst_offset = decoder->cur_offset + y * height * decoder->stride;
	// check if we remain in source
	if ((src_offset < 0)
	||  (src_offset + (x_pred & 1) + (height - 1 + (y_pred & 1)) * decoder->stride > decoder->max_offset))
	{
		if (config.debug & cfg_printwarnings)
			ka_log(ka_log_error | ka_log_video
			     , "Motion_Block8 out of bounds at (%d, %d) move by (%d /2, %d /2)"
			     , decoder->cur_x, decoder->cur_y, x_pred, y_pred
			     );
		x_pred = 0;
		y_pred = 0;
		src_offset = decoder->cur_offset;
	}

	table[xy_half] (decoder->dest.y + dst_offset, src->y + src_offset, decoder->stride, height);

	if (decoder->options & MPEG2_OPT_MONOCHROME)
		return;

	// crominance
	if (decoder->chroma_wshift) x_pred /= 2; // rounding toward 0
	if (decoder->chroma_hshift) y_pred /= 2; // rounding toward 0
	height >>= decoder->chroma_hshift;
	xy_half = 4*decoder->chroma_wshift + (((y_pred & 1) << 1) | (x_pred & 1));
	src_offset = decoder->cur_uvoffset + (x_pred >> 1) + (y_pred >> 1) * decoder->uvstride;
	dst_offset = decoder->cur_uvoffset + y * height * decoder->uvstride;

	table[xy_half] (decoder->dest.cb + dst_offset, src->cb + src_offset, decoder->uvstride, height);
	table[xy_half] (decoder->dest.cr + dst_offset, src->cr + src_offset, decoder->uvstride, height);
}

// Note: x_pred, y_pred are 1/2 units
static inline void motion_blocki (mpeg2_decoder_t* const decoder, mpeg2_mc_fct** table,
         int x_pred, int y_pred, yuv_bufs* src, int dst_field)
{
	int xy_half;
	int src_offset;
	int dst_offset;
	int stride = decoder->stride << 1;
	int height = 8;

	// luminance
	xy_half = ((y_pred & 1) << 1) | (x_pred & 1);
	src_offset = decoder->cur_offset + (x_pred >> 1) + (y_pred >> 1) * stride;
	dst_offset = decoder->cur_offset + dst_field * decoder->stride;
	// check if we remain in source
	if ((src_offset < 0)
	||  (src_offset + (x_pred & 1) + (height - 1 + (y_pred & 1)) * stride > decoder->max_offset))
	{
		if (config.debug & cfg_printwarnings)
			ka_log(ka_log_error | ka_log_video
			     , "Motion blocki out of bounds at (%d, %d) move by (%d /2, %d /2)"
			     , decoder->cur_x, decoder->cur_y, x_pred, y_pred
			     );
		x_pred = 0;
		y_pred = 0;
		src_offset = decoder->cur_offset;
	}

	table[xy_half] (decoder->dest.y + dst_offset, src->y + src_offset, stride, height);

	if (decoder->options & MPEG2_OPT_MONOCHROME)
		return;

	// crominance
	if (decoder->chroma_wshift) x_pred /= 2; // rounding toward 0
	if (decoder->chroma_hshift) y_pred /= 2; // rounding toward 0
	height >>= decoder->chroma_hshift;
	stride = decoder->uvstride << 1;

	xy_half = 4*decoder->chroma_wshift + (((y_pred & 1) << 1) | (x_pred & 1));
	src_offset = decoder->cur_uvoffset + (x_pred >> 1) + (y_pred >> 1) * stride;
	dst_offset = decoder->cur_uvoffset + dst_field * decoder->uvstride;

	table[xy_half] (decoder->dest.cb + dst_offset, src->cb + src_offset, stride, height);
	table[xy_half] (decoder->dest.cr + dst_offset, src->cr + src_offset, stride, height);
}

static void motion_mp1 (mpeg2_decoder_t * const decoder, motion_t * const motion, mpeg2_mc_fct** table)
{
	int motion_x, motion_y;

	motion_x = add_motion_delta (motion->pmv[0][0], decoder, motion->f_code[0]);
	motion->pmv[1][0] = motion->pmv[0][0] = motion_x;

	motion_y = add_motion_delta (motion->pmv[0][1], decoder, motion->f_code[0]);
	motion->pmv[1][1] = motion->pmv[0][1] = motion_y;

	motion_block (decoder, table, motion_x << motion->f_code[1], motion_y << motion->f_code[1], &motion->ref[0]);
}

static void motion_fr_frame (mpeg2_decoder_t * decoder, motion_t * motion, mpeg2_mc_fct** table)
{
	int motion_x, motion_y;

	motion_x = add_motion_delta (motion->pmv[0][0], decoder, motion->f_code[0]);
	motion->pmv[1][0] = motion->pmv[0][0] = motion_x;

	motion_y = add_motion_delta (motion->pmv[0][1], decoder, motion->f_code[1]);
	motion->pmv[1][1] = motion->pmv[0][1] = motion_y;

	motion_block (decoder, table, motion_x, motion_y, &motion->ref[0]);
}

static void motion_fr_field (mpeg2_decoder_t * decoder, motion_t * motion, mpeg2_mc_fct** table)
{
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)
#define bit_ptr (decoder->bitstream_ptr)
	int motion_x, motion_y;
	int field_select; // 0 or 1

	NEEDBITS (bit_buf, bits, bit_ptr);
	field_select = UBITS (bit_buf, 1);
	DUMPBITS (bit_buf, bits, 1);

	motion_x = add_motion_delta (motion->pmv[0][0], decoder, motion->f_code[0]);
	motion->pmv[0][0] = motion_x;

	motion_y = add_motion_delta (motion->pmv[0][1] >> 1, decoder, motion->f_code[1]);
	motion->pmv[0][1] = motion_y << 1;

	motion_blocki (decoder, table, motion_x, motion_y, &motion->ref[field_select], 0);

	NEEDBITS (bit_buf, bits, bit_ptr);
	field_select = UBITS (bit_buf, 1);
	DUMPBITS (bit_buf, bits, 1);

	motion_x = add_motion_delta (motion->pmv[1][0], decoder, motion->f_code[0]);
	motion->pmv[1][0] = motion_x;

	motion_y = add_motion_delta (motion->pmv[1][1] >> 1, decoder, motion->f_code[1]);
	motion->pmv[1][1] = motion_y << 1;

	motion_blocki (decoder, table, motion_x, motion_y, &motion->ref[field_select], 1);
#undef bit_buf
#undef bits
#undef bit_ptr
}

static void motion_fr_dmv (mpeg2_decoder_t * decoder, motion_t * motion)
{
	int motion_x, motion_y;
	int dmv_x, dmv_y;
	int m;
	int other_x, other_y;

	motion_x = add_motion_delta (motion->pmv[0][0], decoder, motion->f_code[0]);
	motion->pmv[1][0] = motion->pmv[0][0] = motion_x;

	dmv_x = get_dmv (decoder);

	motion_y = add_motion_delta (motion->pmv[0][1] >> 1, decoder, motion->f_code[1]);
	motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1;

	dmv_y = get_dmv (decoder);

	motion_blocki (decoder, mc_functions.put, motion_x, motion_y, &motion->ref[0], 0);

	m = decoder->top_field_first ? 1 : 3;
	other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x;
	other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1;
	motion_blocki (decoder, mc_functions.avg, other_x, other_y, &motion->ref[1], 0);

	motion_blocki (decoder, mc_functions.put, motion_x, motion_y, &motion->ref[1], 1);

	m = decoder->top_field_first ? 3 : 1;
	other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x;
	other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1;
	motion_blocki (decoder, mc_functions.avg, other_x, other_y, &motion->ref[0], 1);
}

/* like motion_frame, but reuse previous motion vectors */
static void motion_fr_reuse (mpeg2_decoder_t * decoder, motion_t * motion, mpeg2_mc_fct** table)
{
	motion_block (decoder, table, motion->pmv[0][0], motion->pmv[0][1], &motion->ref[0]);
}

/* like motion_frame, but parsing without actual motion compensation */
static void motion_fr_conceal (mpeg2_decoder_t * decoder)
{
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)
#define bit_ptr (decoder->bitstream_ptr)
	int tmp;

	tmp = add_motion_delta (decoder->f_motion.pmv[0][0], decoder, decoder->f_motion.f_code[0]);
	decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp;

	tmp = add_motion_delta (decoder->f_motion.pmv[0][1], decoder, decoder->f_motion.f_code[1]);
	decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp;

	NEEDBITS (bit_buf, bits, bit_ptr);
	DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */
#undef bit_buf
#undef bits
#undef bit_ptr
}

static void motion_fi_field (mpeg2_decoder_t * decoder, motion_t * motion, mpeg2_mc_fct** table)
{
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)
#define bit_ptr (decoder->bitstream_ptr)
	int motion_x, motion_y;
	int field_select;

	NEEDBITS (bit_buf, bits, bit_ptr);
	field_select = UBITS (bit_buf, 1);
	DUMPBITS (bit_buf, bits, 1);

	motion_x = add_motion_delta (motion->pmv[0][0], decoder, motion->f_code[0]);
	motion->pmv[1][0] = motion->pmv[0][0] = motion_x;

	motion_y = add_motion_delta (motion->pmv[0][1], decoder, motion->f_code[1]);
	motion->pmv[1][1] = motion->pmv[0][1] = motion_y;

	motion_block (decoder, table, motion_x, motion_y, &motion->ref[field_select]);
#undef bit_buf
#undef bits
#undef bit_ptr
}

static void motion_fi_16x8 (mpeg2_decoder_t * decoder, motion_t * motion, mpeg2_mc_fct** table)
{
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)
#define bit_ptr (decoder->bitstream_ptr)
	int motion_x, motion_y;
	int field_select;

	NEEDBITS (bit_buf, bits, bit_ptr);
	field_select = UBITS (bit_buf, 1);
	DUMPBITS (bit_buf, bits, 1);

	motion_x = add_motion_delta (motion->pmv[0][0], decoder, motion->f_code[0]);
	motion->pmv[0][0] = motion_x;

	motion_y = add_motion_delta (motion->pmv[0][1], decoder, motion->f_code[1]);
	motion->pmv[0][1] = motion_y;

	motion_block8 (decoder, table, motion_x, motion_y, &motion->ref[field_select], 0);

	NEEDBITS (bit_buf, bits, bit_ptr);
	field_select = UBITS (bit_buf, 1);
	DUMPBITS (bit_buf, bits, 1);

	motion_x = add_motion_delta (motion->pmv[1][0], decoder, motion->f_code[0]);
	motion->pmv[1][0] = motion_x;

	motion_y = add_motion_delta (motion->pmv[1][1], decoder, motion->f_code[1]);
	motion->pmv[1][1] = motion_y;

	motion_block8 (decoder, table, motion_x, motion_y, &motion->ref[field_select], 1);
#undef bit_buf
#undef bits
#undef bit_ptr
}

static void motion_fi_dmv (mpeg2_decoder_t * decoder, motion_t * motion)
{
	int motion_x, motion_y;
	int dmv_x, dmv_y;

	motion_x = add_motion_delta (motion->pmv[0][0], decoder, motion->f_code[0]);
	motion->pmv[1][0] = motion->pmv[0][0] = motion_x;

	dmv_x = get_dmv (decoder);

	motion_y = add_motion_delta (motion->pmv[0][1], decoder, motion->f_code[1]);
	motion->pmv[1][1] = motion->pmv[0][1] = motion_y;

	dmv_y = get_dmv (decoder);

    // from field with same parity
	motion_block (decoder, mc_functions.put, motion_x, motion_y, &motion->ref[decoder->field_nr]);

	motion_x = ((motion_x + (motion_x > 0)) >> 1) + dmv_x;
	motion_y = ((motion_y + (motion_y > 0)) >> 1) + dmv_y + 2 * decoder->field_nr - 1;
	// from field with opposite parity
	motion_block (decoder, mc_functions.avg, motion_x, motion_y, &motion->ref[!decoder->field_nr]);
}

static void motion_fi_reuse (mpeg2_decoder_t * decoder, motion_t * motion, mpeg2_mc_fct** table)
{
	motion_block (decoder, table, motion->pmv[0][0], motion->pmv[0][1], &motion->ref[decoder->field_nr]);
}

static void motion_fi_conceal (mpeg2_decoder_t * decoder)
{
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)
#define bit_ptr (decoder->bitstream_ptr)
	int tmp;

	NEEDBITS (bit_buf, bits, bit_ptr);
	DUMPBITS (bit_buf, bits, 1); /* remove field_select */

	tmp = add_motion_delta (decoder->f_motion.pmv[0][0], decoder, decoder->f_motion.f_code[0]);
	decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp;

	tmp = add_motion_delta (decoder->f_motion.pmv[0][1], decoder, decoder->f_motion.f_code[1]);
	decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp;

	DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */
#undef bit_buf
#undef bits
#undef bit_ptr
}

#define MOTION(routine,direction)                  \
do                                                 \
{                                                  \
  if ((direction) & MACROBLOCK_MOTION_FORWARD)     \
    routine (decoder, &(decoder->f_motion),        \
           mc_functions.put);                      \
  if ((direction) & MACROBLOCK_MOTION_BACKWARD)    \
    routine (decoder, &(decoder->b_motion),        \
       ((direction) & MACROBLOCK_MOTION_FORWARD ?  \
           mc_functions.avg : mc_functions.put));  \
} while (0)

#define NEXT_MACROBLOCK                                         \
do                                                              \
{                                                               \
  decoder->cur_x += 16;                                         \
  decoder->cur_offset += 16;                                    \
  decoder->cur_uvoffset += (16 >> decoder->chroma_wshift);      \
  if (decoder->cur_x >= decoder->lumi_width)                    \
  {                                                             \
    decoder->cur_y += 16;                                       \
    decoder->cur_x = 0;                                         \
    decoder->cur_offset = decoder->cur_y * decoder->stride;     \
    decoder->cur_uvoffset = (decoder->cur_y >> decoder->chroma_hshift) * decoder->uvstride; \
  }                                                             \
} while (0)

static void mpeg2_skipped_macroblocks(mpeg2_decoder_t* decoder, int skipped)
{
	if (skipped <= 0)
		return;

	// reset intra dc predictor on skipped block
	decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
	decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision;

	// Clause 7.6.6:  handling of skipped mb's differs between P_TYPE and B_TYPE pictures
	switch (decoder->coding_type)
	{
		case I_TYPE:
		case D_TYPE:
		{
			if (config.debug & cfg_printwarnings)
				ka_log(ka_log_error | ka_log_video, "skipped %d macroblocks in I/D picture at (%d, %d)"
				     , skipped, decoder->cur_x, decoder->cur_y);
		}
		// No break
		case P_TYPE:
		{
			decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
			decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
			decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0;
			decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0;
		}
		break;
	}

	if ((decoder->coding_type == B_TYPE)
	&&  (decoder->macroblock_modes & (MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD)))
	{
		// reuse existing motion vectors
		do
		{
			if (decoder->cur_y >= decoder->max_y)
				return;

			if (decoder->picture_structure == FRAME_PICTURE)
				MOTION (motion_fr_reuse, decoder->macroblock_modes);
			else
				MOTION (motion_fi_reuse, decoder->macroblock_modes);

			NEXT_MACROBLOCK;
		} while (--skipped);
	}
	else
	{
		// no vector, just copy from reference
		do
		{
			if (decoder->cur_y >= decoder->max_y)
				return;

			if (decoder->picture_structure == FRAME_PICTURE)
				motion_block (decoder, mc_functions.put, 0, 0, &decoder->f_motion.ref[0]);
			else
				motion_block (decoder, mc_functions.put, 0, 0, &decoder->f_motion.ref[decoder->field_nr]);

			NEXT_MACROBLOCK;
		} while (--skipped);
	}
}

static int mpeg2_increment_macroblock_address(mpeg2_decoder_t * decoder, int first_macroblock)
{
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)
#define bit_ptr (decoder->bitstream_ptr)

	const MBAtab * tab;
	int mba = 0;

	NEEDBITS (bit_buf, bits, bit_ptr);

	while (1)
	{
		if (bit_buf >= 0x10000000)
		{
			tab = MBA_5 + UBITS (bit_buf, 5) - 2;
			DUMPBITS (bit_buf, bits, tab->len);
			mba += tab->mba;
			return mba;
		}
		else if (bit_buf >= 0x03000000)
		{
			tab = MBA_11 + UBITS (bit_buf, 11) - 24;
			DUMPBITS (bit_buf, bits, tab->len);
			mba += tab->mba;
			return mba;
		}
		else switch (UBITS (bit_buf, 11))
		{
			case 8:   /* macroblock_escape */
				mba += 33;
			/* no break here on purpose */
			case 15:  /* macroblock_stuffing (MPEG1 only) */
				DUMPBITS (bit_buf, bits, 11);
				NEEDBITS (bit_buf, bits, bit_ptr);
			break;
			case 0: /* start of next block (but not in first one) or invalide code */
				if (first_macroblock)
					return -1;
				DUMPBITS (bit_buf, bits, 11);
				NEEDBITS (bit_buf, bits, bit_ptr);
				if (UBITS (bit_buf, 12))
					return -1;

			  	DUMPBITS (bit_buf, bits, 12);
			  		return -2; // end of slice
			break;
			default: // error
				return -1;
		}
	}
#undef bit_buf
#undef bits
#undef bit_ptr
}

/**
 * Initialises decoding of a picture/field.
 */
void mpeg2_slices_init(mpeg2_decoder_t * decoder
			, const yuv_bufs * forward_ref
			, const yuv_bufs * current_ref
			, const yuv_bufs * backward_ref)
{
	int lu_offset, uv_offset;
	yuv_bufs* buf;

	decoder->cur_x = 0;
	decoder->cur_y = 0;
	decoder->cur_offset = 0;
	decoder->cur_uvoffset = 0;
	if (decoder->picture_structure == FRAME_PICTURE)
		decoder->max_y = decoder->lumi_height;
	else
		decoder->max_y = decoder->lumi_height >> 1;
	decoder->max_offset = decoder->lumi_width * decoder->lumi_height;
	decoder->max_offset -= 16; // motion is in both of 16 wide

    // offsets for bottom field
	lu_offset = decoder->lumi_width;
	uv_offset = lu_offset >> decoder->chroma_wshift;

	if (decoder->picture_structure == FRAME_PICTURE)
	{
		// picture uses the two last decoded reference (I, P) frames
		decoder->stride = decoder->lumi_width;
		decoder->uvstride = decoder->stride >> decoder->chroma_wshift;

        // map references to previously decoded frames
		decoder->f_motion.ref[0] = decoder->f_motion.ref[1] = *forward_ref;
		decoder->b_motion.ref[0] = decoder->b_motion.ref[1] = *backward_ref;

        // add offset for bottom field
		buf = &decoder->f_motion.ref[1];
		buf->y += lu_offset;
		buf->cb += uv_offset;
		buf->cr += uv_offset;
		buf = &decoder->b_motion.ref[1];
		buf->y += lu_offset;
		buf->cb += uv_offset;
		buf->cr += uv_offset;

        // destination will start at line 0
		lu_offset = 0;
		uv_offset = 0;
	}
	else
	{
		// field uses the two last decoded reference (I, P) fields
		decoder->stride = decoder->lumi_width << 1;
		decoder->uvstride = decoder->stride >> decoder->chroma_wshift;

        // map references to previously decoded frames
        // unless we are decoding second field of a P frame
		if (!decoder->second_field || (decoder->coding_type != P_TYPE))
		{
			decoder->f_motion.ref[0] = decoder->f_motion.ref[1] = *forward_ref;
		}
		else
		{
			decoder->f_motion.ref[decoder->field_nr] = *forward_ref;
			decoder->f_motion.ref[!decoder->field_nr] = *current_ref;
		}
		decoder->b_motion.ref[0] = decoder->b_motion.ref[1] = *backward_ref;

        // add offset for bottom field
		buf = &decoder->f_motion.ref[1];
		buf->y += lu_offset;
		buf->cb += uv_offset;
		buf->cr += uv_offset;
		buf = &decoder->b_motion.ref[1];
		buf->y += lu_offset;
		buf->cb += uv_offset;
		buf->cr += uv_offset;

        // destination will start at line 0 for top field, 1 for bottom field
		if (!decoder->field_nr)
		{
			lu_offset = 0;
			uv_offset = 0;
		}
	}

	// set destination yuv start pointers
	decoder->dest.y = current_ref->y + lu_offset;
	decoder->dest.cb = current_ref->cb + uv_offset;
	decoder->dest.cr = current_ref->cr + uv_offset;
}

void mpeg2_slices_complete(mpeg2_decoder_t * decoder)
{
	int skipped = (decoder->max_y - decoder->cur_y) >> 4;
	skipped = skipped * decoder->lumi_width - decoder->cur_x;
	skipped >>= 4;

	decoder->macroblock_modes = 0;
	mpeg2_skipped_macroblocks(decoder, skipped);
}

static void slice_DCT(mpeg2_decoder_t * decoder, fn_slice_DCT fn, int coded_block_pattern)
{
	uint8_t* DCT_pos1;
	uint8_t* DCT_pos2;
	uint8_t* DCT_pos3;
	uint8_t* DCT_pos4;
	int DCT_stride;

	/* Decode lum blocks */
	DCT_pos1 = decoder->dest.y + decoder->cur_offset;

	if (decoder->macroblock_modes & DCT_TYPE_INTERLACED)
	{
		DCT_pos2 = DCT_pos1 + decoder->stride;
		DCT_stride = decoder->stride * 2;
	}
	else
	{
		DCT_pos2 = DCT_pos1 + decoder->stride * 8;
		DCT_stride = decoder->stride;
	}

	if (coded_block_pattern & 0x800)
		fn(decoder, 0, DCT_pos1, DCT_stride);
	if (coded_block_pattern & 0x400)
		fn(decoder, 0, DCT_pos1 + 8, DCT_stride);
	if (coded_block_pattern & 0x200)
		fn(decoder, 0, DCT_pos2, DCT_stride);
	if (coded_block_pattern & 0x100)
		fn(decoder, 0, DCT_pos2 + 8, DCT_stride);

	/* Decode chroma blocks */
	DCT_pos1 = decoder->dest.cb + decoder->cur_uvoffset;
	DCT_pos3 = decoder->dest.cr + decoder->cur_uvoffset;

	if ((decoder->macroblock_modes & DCT_TYPE_INTERLACED)
	&&  (decoder->chroma_type != SEQ_CHROMA_420))
	{
		DCT_pos2 = DCT_pos1 + decoder->uvstride;
		DCT_pos4 = DCT_pos3 + decoder->uvstride;
		DCT_stride = decoder->uvstride * 2;
	}
	else
	{
		DCT_pos2 = DCT_pos1 + decoder->uvstride * 8;
		DCT_pos4 = DCT_pos3 + decoder->uvstride * 8;
		DCT_stride = decoder->uvstride;
	}

	if (coded_block_pattern & 0x80)
		fn(decoder, 1, DCT_pos1, DCT_stride);
	if (coded_block_pattern & 0x40)
		fn(decoder, 2, DCT_pos3, DCT_stride);
	if (coded_block_pattern & 0x20)
		fn(decoder, 1, DCT_pos2, DCT_stride);
	if (coded_block_pattern & 0x10)
		fn(decoder, 2, DCT_pos4, DCT_stride);
	if (coded_block_pattern & 0x08)
		fn(decoder, 1, DCT_pos1 + 8, DCT_stride);
	if (coded_block_pattern & 0x04)
		fn(decoder, 2, DCT_pos3 + 8, DCT_stride);
	if (coded_block_pattern & 0x02)
		fn(decoder, 1, DCT_pos2 + 8, DCT_stride);
	if (coded_block_pattern & 0x01)
		fn(decoder, 2, DCT_pos4 + 8, DCT_stride);
}

int mpeg2_slice(mpeg2_decoder_t * decoder, uint8_t code, const uint8_t * buffer, const uint8_t * buffer_end)
{
	int skipped, incr;
#define bit_buf (decoder->bitstream_buf)
#define bits (decoder->bitstream_bits)
#define bit_ptr (decoder->bitstream_ptr)

	decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
	decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
	decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0;
	decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0;

	/* reset intra dc predictor */
	decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
	decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision;

	bitstream_init (decoder, buffer);

	if (decoder->vertical_position_extension)
	{
		code += UBITS (bit_buf, 3) << 7;
		DUMPBITS (bit_buf, bits, 3);
	}

	if ((code - 1) >= (decoder->max_y >> 4))
	{
		if (config.debug & cfg_printwarnings)
			ka_log(ka_log_error | ka_log_video, "invalid slice number 0x%02x", code);
		return CHUNK_CORRUPTED;
	}

	// if sequence scalable ext & partitioning, priority breakpoint
	//UBITS (bit_buf, 7);
	//DUMPBITS (bit_buf, 7);

	decoder->quantizer_scale = get_quantizer_scale (decoder);
	decoder->macroblock_modes = 0;

	/* ignore intra_slice and all the extra data */
	while (bit_buf & 0x80000000)
	{
		DUMPBITS (bit_buf, bits, 9);
		NEEDBITS (bit_buf, bits, bit_ptr);
	}
	DUMPBITS (bit_buf, bits, 1);

	skipped = (code - 1) - (decoder->cur_y >> 4);
	skipped = skipped * decoder->lumi_width - decoder->cur_x;
	skipped >>= 4;
	incr = 1;

	while (bit_ptr - ((16 - bits) >> 3) < buffer_end)
	{
		incr = mpeg2_increment_macroblock_address(decoder, incr);
		if (incr < 0)
		{
			if (incr == -2) break; // next start code, end of slice
			if (config.debug & cfg_printwarnings)
				ka_log(ka_log_error | ka_log_video, "Invalid macroblock increment");
			return CHUNK_CORRUPTED;
		}
		skipped += incr;

		if (skipped < 0)
		{
			if (config.debug & cfg_printwarnings)
				ka_log(ka_log_error | ka_log_video, "macroblock overlapping of %d macroblocks", -skipped);
			return CHUNK_CORRUPTED;
		}
		mpeg2_skipped_macroblocks(decoder, skipped);
		skipped = incr = 0;

		if (decoder->cur_y >= decoder->max_y)
		{
			if (config.debug & cfg_printwarnings)
				ka_log(ka_log_error | ka_log_video, "invalid macroblock position (%d, %d)", decoder->cur_x, decoder->cur_y);
			return CHUNK_CORRUPTED;
		}

		decoder->macroblock_modes = get_macroblock_modes (decoder);
		if (!decoder->macroblock_modes)
		{
			if (config.debug & cfg_printwarnings)
				ka_log(ka_log_error | ka_log_video, "invalid macroblock_type VLC");
			return CHUNK_CORRUPTED;
		}

		if (decoder->macroblock_modes & MACROBLOCK_QUANT)
			decoder->quantizer_scale = get_quantizer_scale (decoder);

		if (decoder->macroblock_modes & MACROBLOCK_INTRA)
		{
			if (decoder->concealment_motion_vectors)
			{
				// Not in MPEG-1
				if (decoder->picture_structure == FRAME_PICTURE)
					motion_fr_conceal (decoder);
				else
					motion_fi_conceal (decoder);
			}
			else
			{
				decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
				decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
				decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0;
				decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0;
			}

			slice_DCT(decoder, slice_intra_DCT, decoder->blockmask);

			if (decoder->coding_type == D_TYPE)
			{
				NEEDBITS (bit_buf, bits, bit_ptr);
				DUMPBITS (bit_buf, bits, 1);
			}
		}
		else
		{
			if (decoder->mpeg1)
			{
				if (decoder->macroblock_modes & (MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD))
					MOTION (motion_mp1, decoder->macroblock_modes);
				else
				{
					/* non-intra mb without forward mv in a P picture */
					decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
					decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
					decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0;
					decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0;
					motion_block (decoder, mc_functions.put, 0, 0, &decoder->f_motion.ref[0]);
				}
			}
			else if (decoder->picture_structure == FRAME_PICTURE)
			{
				switch (decoder->macroblock_modes & MOTION_TYPE_MASK)
				{
					case MC_FRAME:
						MOTION (motion_fr_frame, decoder->macroblock_modes);
					break;
					case MC_FIELD:
						MOTION (motion_fr_field, decoder->macroblock_modes);
					break;
					case MC_DMV:
						motion_fr_dmv(decoder, &(decoder->f_motion));
					break;
					case 0:
						/* non-intra mb without forward mv in a P picture */
						decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
						decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
						decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0;
						decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0;
						motion_block (decoder, mc_functions.put, 0, 0, &decoder->f_motion.ref[0]);
					break;
				}
			}
			else
			{
				switch (decoder->macroblock_modes & MOTION_TYPE_MASK)
				{
					case MC_FIELD:
						MOTION (motion_fi_field, decoder->macroblock_modes);
					break;
					case MC_16X8:
						MOTION (motion_fi_16x8, decoder->macroblock_modes);
					break;
					case MC_DMV:
						motion_fi_dmv(decoder, &(decoder->f_motion));
					break;
					case 0:
						/* non-intra mb without forward mv in a P picture */
						decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0;
						decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0;
						decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0;
						decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0;
						motion_block (decoder, mc_functions.put, 0, 0
							, &decoder->f_motion.ref[decoder->field_nr]);
					break;
				}
			}

			/* 6.3.17.4 Coded block pattern */
			if (decoder->macroblock_modes & MACROBLOCK_PATTERN)
			{
				int coded_block_pattern;

				coded_block_pattern = get_coded_block_pattern (decoder);
				slice_DCT(decoder, slice_non_intra_DCT, coded_block_pattern);
			}

			decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] =
			decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision;
		}

		NEXT_MACROBLOCK;
	}

	if (config.debug & cfg_printwarnings)
	{
	  	bit_ptr -= (16 - bits) >> 3;
		if (bit_ptr > buffer_end)
			ka_log(ka_log_error | ka_log_video, "exceeded slice end by %d bytes", bit_ptr - buffer_end);
	}

	return CHUNK_VALID;
#undef bit_buf
#undef bits
#undef bit_ptr
}
