/**********************************************************************
 * ISO MPEG Audio Subgroup Software Simulation Group (1996)
 * ISO 13818-3 MPEG-2 Audio Decoder - Lower Sampling Frequency Extension
 *
 * $Id: decode.c,v 1.2 1996/03/28 03:13:37 rowlands Exp $
 *
 * $Log: decode.c,v $
 * Revision 1.2  1996/03/28 03:13:37  rowlands
 * Merged layers 1-2 and layer 3 revisions
 *
 * Revision 1.1  1996/02/14 03:45:52  rowlands
 * Initial revision
 *
 * Received from FhG
 **********************************************************************/
/**********************************************************************
 *   date   programmers         comment                               *
 * 2/25/91  Douglas Wong,       start of version 1.0 records          *
 *          Davis Pan                                                 *
 * 3/06/91  Douglas Wong        rename: setup.h to dedef.h            *
 *                                      dfilter to defilter           *
 *                                      dwindow to dewindow           *
 *                              integrated "quantizer", "scalefactor" *
 *                              combined window_samples routine into  *
 *                              filter samples                        *
 * 3/31/91  Bill Aspromonte     replaced read_filter by               *
 *                              create_syn_filter and introduced a    *
 *                              new Sub-Band Synthesis routine called *
 *                              SubBandSynthesis()                    *
 * 5/10/91  Vish (PRISM)        Ported to Macintosh and Unix.         *
 *                              Changed "out_fifo()" so that last     *
 *                              unfilled block is also written out.   *
 *                              "create_syn_filter()" was modified so *
 *                              that calculation precision is same as *
 *                              in specification tables.              *
 *                              Changed "decode_scale()" to reflect   *
 *                              specifications.                       *
 *                              Removed all routines used by          *
 *                              "synchronize_buffer()".  This is now  *
 *                              replaced by "seek_sync()".            *
 *                              Incorporated Jean-Georges Fritsch's   *
 *                              "bitstream.c" package.                *
 *                              Deleted "reconstruct_sample()".       *
 * 27jun91  dpwe (Aware)        Passed outFile and &sampFrames as     *
 *                              args to out_fifo() - were global.     *
 *                              Moved "alloc_*" reader to common.c.   *
 *                              alloc, sblimit, stereo passed via new *
 *                              'frame_params struct (were globals).  *
 *                              Added JOINT STEREO decoding, lyrs I&II*
 *                              Affects: decode_bitalloc,buffer_samps *
 *                              Plus a few other cleanups.            *
 * 6/10/91   Earle Jennings     conditional expansion added in        *
 *                              II_dequantize_sample to handle range  *
 *                              problems in MSDOS version             *
 * 8/8/91    Jens Spille        Change for MS-C6.00                   *
 *10/1/91    S.I. Sudharsanan,  Ported to IBM AIX platform.           *
 *           Don H. Lee,                                              *
 *           Peter W. Farrett                                         *
 *10/3/91    Don H. Lee         implemented CRC-16 error protection   *
 *                              newly introduced functions are        *
 *                              buffer_CRC and recover_CRC_error.     *
 * 2/11/92  W. Joseph Carter    Ported new code to Macintosh.  Most   *
 *                              important fixes involved changing     *
 *                              16-bit ints to long or unsigned in    *
 *                              bit alloc routines for quant of 65535 *
 *                              and passing proper function args.     *
 *                              Removed "Other Joint Stereo" option   *
 *                              and made bitrate be total channel     *
 *                              bitrate, irrespective of the mode.    *
 *                              Fixed many small bugs & reorganized.  *
 * 7/27/92  Juan Pineda         Bug fix in SubBandSynthesis()         *
 *--------------------------------------------------------------------*
 * 6/14/92  Juan Pineda         Layer III decoding routines added.    *
 *          Amit Gulati         Follows CD 3-11172 rev2.  Contains    *
 *                              hacks deal with evolving available    *
 *                              layerIII bitstreams.  Some (minor)    *
 *                              modification of prior LI&II code.     *
 * 10/25/92 Amit Gulati         Updated layerIII routines. Added code *
 *                              for subblock_gain, switched block     *
 *                              modes, stereo pre-processing.         *
 *                              Corrected sign bits for huffman       *
 *                              decoding of quadruples region and     *
 *                              adjusted gain factor in III_dequant.  *
 * 11/21/92 Amit Gulati         Several layerIII bugs fixed.          *
 * 12/15/92 Amit Gulati         Corrected reordering (indexing)       *
 *          Stan Searing        within IMDCT routine.                 *
 *  8/24/93 Masahiro Iwadare    Included IS modification in Layer III.*
 *                              Changed for 1 pass decoding.          *
 *  9/07/93 Toshiyuki Ishino    Integrated Layer III with Ver 3.9.    *
 *--------------------------------------------------------------------*
 * 11/20/93 Masahiro Iwadare    Integrated Layer III with Ver 4.0.    *
 *--------------------------------------------------------------------*
 *  7/14/94 Juergen Koller      Bug fixes in Layer III code           *
 *--------------------------------------------------------------------*
 * 08/11/94 IIS                 Bug fixes in Layer III code           *
 *--------------------------------------------------------------------*
 * 9/20/94  Davis Pan           Modification to avoid premature       *
 *                              synchword detection                   *
 *--------------------------------------------------------------------*
 * 11/09/94 Jon Rowlands        Merged premature synchword detection  *
 *                              fix into layer III code version       *
 *--------------------------------------------------------------------*
 * 07/12/95 Soeren H. Nielsen   Changes for LSF Layer I and II        *
 *--------------------------------------------------------------------*
 *    8/95  Roland Bitto	Adapted to MPEG2                      *
 *  9/8/95  Roalnd Bitto        Bugfix in Function III_stereo         *
 *--------------------------------------------------------------------*
 * 12/16/96 Johan Hagman	Adapted for Solaris (mpeg3play 0.9)   *
 *--------------------------------------------------------------------*
 * 04/09/98 Niall Douglas	Adapted for RISC-OS                   *
 **********************************************************************/

#include        "common.h"
#include        "decoder.h"
#include        "huffman.h"
#include        "sound.h"

/***************************************************************
 *
 * This module contains the core of the decoder ie all the
 * computational routines. (Layer I and II only)
 * Functions are common to both layer unless
 * otherwise specified.
 *
 ***************************************************************/

extern Arguments_t Arguments;
extern int	   audiofd;

/*****************************************************************
 *
 * The following routines decode the system information
 *
 ****************************************************************/

/************ Layer I, Layer II & Layer III ******************/

void decode_info(
    Bit_stream_struc	*bs,
    frame_params	*fr_ps)
{
    layer *hdr = fr_ps->header;

    hdr->version	= get1bit(bs);
    hdr->lay		= 4 - getbits(bs,2);
    hdr->error_protection = !get1bit(bs); /* error protect. TRUE/FALSE */
    hdr->bitrate_index	= getbits(bs,4);
    hdr->sampling_frequency = getbits(bs,2);
    hdr->padding	= get1bit(bs);
    hdr->extension	= get1bit(bs);
    hdr->mode = getbits(bs,2);
    hdr->mode_ext	= getbits(bs,2);
    hdr->copyright	= get1bit(bs);
    hdr->original	= get1bit(bs);
    hdr->emphasis	= getbits(bs,2);
}

/*******************************************************************
 *
 * The bit allocation information is decoded. Layer I
 * has 4 bit per subband whereas Layer II is Ws and bit rate
 * dependent.
 *
 ********************************************************************/

/**************************** Layer II *************/

void II_decode_bitalloc(
    Bit_stream_struc	*bs,
    unsigned int	 bit_alloc[2][SBLIMIT],
    frame_params	*fr_ps)
{
    int		 i, j;
    int		 stereo = fr_ps->stereo;
    int		 sblimit = fr_ps->sblimit;
    int		 jsbound = fr_ps->jsbound;
    al_table	*alloc = fr_ps->alloc;

    for (i = 0; i < jsbound; i++)
	for (j = 0; j < stereo; j++)
	    bit_alloc[j][i] = (char)getbits(bs, (*alloc)[i][0].bits);

    for (i = jsbound; i < sblimit; i++)		/* expand to 2 channels */
	bit_alloc[0][i] = bit_alloc[1][i] =
		    (char)getbits(bs, (*alloc)[i][0].bits);

    for (i = sblimit; i < SBLIMIT; i++)
	for (j = 0; j < stereo; j++)
	    bit_alloc[j][i] = 0;
}

/**************************** Layer I *************/

void I_decode_bitalloc(
    Bit_stream_struc *bs,
    unsigned int bit_alloc[2][SBLIMIT],
    frame_params *fr_ps)
{
    int i,j;
    int stereo	= fr_ps->stereo;
    /*int sblimit = fr_ps->sblimit;*/
    int jsbound = fr_ps->jsbound;
    int b;

    for (i=0;i<jsbound;i++) for (j=0;j<stereo;j++)
        bit_alloc[j][i] = getbits(bs,4);
    for (i=jsbound;i<SBLIMIT;i++) {
        b = getbits(bs,4);
        for (j=0;j<stereo;j++)
            bit_alloc[j][i] = b;
    }
}

/*****************************************************************
 *
 * The following two functions implement the layer I and II
 * format of scale factor extraction. Layer I involves reading
 * 6 bit per subband as scale factor. Layer II requires reading
 * first the scfsi which in turn indicate the number of scale factors
 * transmitted.
 *    Layer I : I_decode_scale
 *   Layer II : II_decode_scale
 *
 ****************************************************************/

/************************** Layer I stuff ************************/

void I_decode_scale(
    Bit_stream_struc *bs, unsigned int bit_alloc[2][SBLIMIT],
    unsigned int scale_index[2][3][SBLIMIT], frame_params *fr_ps)
{
    int i,j;
    int stereo = fr_ps->stereo;
    /*int sblimit = fr_ps->sblimit;*/

    for (i=0;i<SBLIMIT;i++) for (j=0;j<stereo;j++)
        if (!bit_alloc[j][i])
            scale_index[j][0][i] = SCALE_RANGE-1;
        else                    /* 6 bit per scale factor */
            scale_index[j][0][i] =  getbits(bs,6);

}

/*************************** Layer II stuff ***************************/

void II_decode_scale(
    Bit_stream_struc	*bs,
    unsigned int	 scfsi[2][SBLIMIT],
    unsigned int	 bit_alloc[2][SBLIMIT],
    unsigned int	 scale_index[2][3][SBLIMIT],
    frame_params	*fr_ps)
{
    int		i, j;
    int		stereo = fr_ps->stereo;
    int		sblimit = fr_ps->sblimit;

#ifdef OPTIMIZE

    for (i = 0; i < sblimit; i++)
	for (j = 0; j < stereo; j++) {		/* 2 bit scfsi */
	    if (bit_alloc[j][i])
		scfsi[j][i] = (char)getbits(bs,2);
	    else
		scfsi[j][i] = 0;
	}

    for (i = 0; i < sblimit; i++) {
	for (j = 0; j < stereo; j++) {
	    if (bit_alloc[j][i])
		switch (scfsi[j][i]) {
		case 0 :
		    /* All three scale factors transmitted */
		    scale_index[j][0][i] = getbits(bs,6);
		    scale_index[j][1][i] = getbits(bs,6);
		    scale_index[j][2][i] = getbits(bs,6);
		    break;
		case 1 :
		    /* Scale factor 1 & 3 transmitted */
		    scale_index[j][0][i] = scale_index[j][1][i] = getbits(bs,6);
		    scale_index[j][2][i] = getbits(bs,6);
		    break;
		case 3 :
		    /* Scale factor 1 & 2 transmitted */
		    scale_index[j][0][i] = getbits(bs,6);
		    scale_index[j][1][i] = scale_index[j][2][i] = getbits(bs,6);
		    break;
		case 2 :
		    /* Only one scale factor transmitted */
		    scale_index[j][0][i] = scale_index[j][1][i] =
			scale_index[j][2][i] = getbits(bs,6);
		    break;
		default :
		    break;
		}
	    else {
		scale_index[j][0][i] = scale_index[j][1][i] =
			scale_index[j][2][i] = SCALE_RANGE-1;
	    }
	}
    }
    for (i = sblimit; i < SBLIMIT; i++)
	for (j = 0; j < stereo; j++)
	    scale_index[j][0][i] = scale_index[j][1][i] =
			scale_index[j][2][i] = SCALE_RANGE - 1;

#else
    /* Original code */

    for (i = 0; i < sblimit; i++)
	for (j = 0; j < stereo; j++)		/* 2 bit scfsi */
	    if (bit_alloc[j][i])
		scfsi[j][i] = (char)getbits(bs,2);

    for (i = sblimit; i < SBLIMIT; i++)
	for (j = 0; j < stereo; j++)
	    scfsi[j][i] = 0;

    for (i = 0; i < sblimit; i++) {
	for (j = 0; j < stereo; j++) {
	    if (bit_alloc[j][i])
		switch (scfsi[j][i]) {
		case 0 :
		    /* All three scale factors transmitted */
		    scale_index[j][0][i] = getbits(bs,6);
		    scale_index[j][1][i] = getbits(bs,6);
		    scale_index[j][2][i] = getbits(bs,6);
		    break;
		case 1 :
		    /* Scale factor 1 & 3 transmitted */
		    scale_index[j][0][i] = scale_index[j][1][i] = getbits(bs,6);
		    scale_index[j][2][i] = getbits(bs,6);
		    break;
		case 3 :
		    /* Scale factor 1 & 2 transmitted */
		    scale_index[j][0][i] = getbits(bs,6);
		    scale_index[j][1][i] = scale_index[j][2][i] = getbits(bs,6);
		    break;
		case 2 :
		    /* Only one scale factor transmitted */
		    scale_index[j][0][i] = scale_index[j][1][i] =
			scale_index[j][2][i] = getbits(bs,6);
		    break;
		default :
		    break;
		}
	    else {
		scale_index[j][0][i] = scale_index[j][1][i] =
			scale_index[j][2][i] = SCALE_RANGE-1;
	    }
	}
    }

    for (i = sblimit; i < SBLIMIT; i++)
	for (j = 0; j < stereo; j++)
	    scale_index[j][0][i] = scale_index[j][1][i] =
			scale_index[j][2][i] = SCALE_RANGE - 1;
#endif /* OPTIMIZE */
}

/**************************************************************
 *
 *   The following two routines take care of reading the
 * compressed sample from the bit stream for both layer 1 and
 * layer 2. For layer 1, read the number of bits as indicated
 * by the bit_alloc information. For layer 2, if grouping is
 * indicated for a particular subband, then the sample size has
 * to be read from the bits_group and the merged samples has
 * to be decompose into the three distinct samples. Otherwise,
 * it is the same for as layer one.
 *
 **************************************************************/

/******************************* Layer I stuff ******************/

void I_buffer_sample(
    Bit_stream_struc *bs,
    unsigned int FAR sample[2][3][SBLIMIT],
    unsigned int bit_alloc[2][SBLIMIT],
    frame_params *fr_ps)
{
    int i,j,k;
    int stereo = fr_ps->stereo;
    /*int sblimit = fr_ps->sblimit;*/
    int jsbound = fr_ps->jsbound;
    unsigned int s;

    for (i=0;i<jsbound;i++) for (j=0;j<stereo;j++)
        if ( (k = bit_alloc[j][i]) == 0)
            sample[j][0][i] = 0;
        else
            sample[j][0][i] = (unsigned int) getbits(bs,k+1);
    for (i=jsbound;i<SBLIMIT;i++) {
        if ( (k = bit_alloc[0][i]) == 0)
            s = 0;
        else
            s = (unsigned int)getbits(bs,k+1);
        for (j=0;j<stereo;j++)
            sample[j][0][i]    = s;
    }
}

/*************************** Layer II stuff ************************/

void II_buffer_sample(
    Bit_stream_struc	*bs,
    unsigned int FAR	 sample[2][3][SBLIMIT],
    unsigned int	 bit_alloc[2][SBLIMIT],
    frame_params	*fr_ps)
{
    int		 i, j;
#ifndef OPTIMIZE
    int		 m;
#endif
    int		 stereo = fr_ps->stereo;
    int		 sblimit = fr_ps->sblimit;
    int		 jsbound = fr_ps->jsbound;
    al_table	*alloc = fr_ps->alloc;

#ifdef OPTIMIZE

    for (i = 0; i < sblimit; i++)
	for (j = 0; j < ((i < jsbound) ? stereo : 1); j++) {
	    if (bit_alloc[j][i]) {

		/* Check for grouping in subband */
		int k = (*alloc)[i][bit_alloc[j][i]].bits;

		if ((*alloc)[i][bit_alloc[j][i]].group == 3) {

		    sample[j][0][i] = (unsigned int)getbits(bs, k);
		    sample[j][1][i] = (unsigned int)getbits(bs, k);
		    sample[j][2][i] = (unsigned int)getbits(bs, k);

		} else {
		    /* bit_alloc = 3, 5, 9 */
		    unsigned int nlevels, c;

		    nlevels = (*alloc)[i][bit_alloc[j][i]].steps;
		    c = (unsigned int)getbits(bs, k);
		    /* Unrolled loop */
		    sample[j][0][i] = c % nlevels;
		    /* Assign both at once. Note that c is modified!! */
		    sample[j][1][i] = (c /= nlevels) % nlevels;
		    sample[j][2][i] = (c /= nlevels) % nlevels;
		}
	    } else {
		/* For no sample transmitted */
		sample[j][0][i] = sample[j][1][i] = sample[j][2][i] = 0;
	    }
	    if (stereo == 2 && i >= jsbound) {	/* joint stereo: copy L to R */
		sample[1][0][i] = sample[0][0][i];
		sample[1][1][i] = sample[0][1][i];
		sample[1][2][i] = sample[0][2][i];
	    }
	}

#else
    /* Original code */
    int	k;

    for (i = 0; i < sblimit; i++)
	for (j = 0; j < ((i<jsbound) ? stereo : 1); j++) {
	    if (bit_alloc[j][i]) {
		/* Check for grouping in subband */
		if ((*alloc)[i][bit_alloc[j][i]].group == 3)
		    for (m = 0; m < 3; m++) {
			k = (*alloc)[i][bit_alloc[j][i]].bits;
			sample[j][m][i] = (unsigned int)getbits(bs, k);
		    }
		else {
		    /* bit_alloc = 3, 5, 9 */
		    unsigned int nlevels,
				 c = 0;

		    nlevels = (*alloc)[i][bit_alloc[j][i]].steps;
		    k = (*alloc)[i][bit_alloc[j][i]].bits;
		    c = (unsigned int)getbits(bs, k);
		    for (k = 0; k < 3; k++) {
			sample[j][k][i] = c % nlevels;
			c /= nlevels;
		    }
		}
	    } else {
		/* For no sample transmitted */
		for (k = 0; k < 3; k++)
		    sample[j][k][i] = 0;
	    }
	    if (stereo == 2 && i>= jsbound)	/* joint stereo: copy L to R */
		for (k = 0; k < 3; k++)
		    sample[1][k][i] = sample[0][k][i];
	}

    for (i = sblimit; i < SBLIMIT; i++)
	for (j = 0; j < stereo; j++)
	    for (k = 0; k < 3; k++)
		sample[j][k][i] = 0;
#endif /* OPTIMIZE */
}

/**************************************************************
 *
 *   Restore the compressed sample to a fractional number.
 *   first complement the MSB of the sample
 *    for layer I :
 *    Use s = (s' + 2^(-nb+1) ) * 2^nb / (2^nb-1)
 *   for Layer II :
 *   Use the formula s = s' * c + d
 *
 **************************************************************/

static REAL c[17] = {	1.33333333333, 1.60000000000, 1.14285714286,
			1.77777777777, 1.06666666666, 1.03225806452,
			1.01587301587, 1.00787401575, 1.00392156863,
			1.00195694716, 1.00097751711, 1.00048851979,
			1.00024420024, 1.00012208522, 1.00006103888,
			1.00003051851, 1.00001525902 };

static REAL d[17] = {	0.500000000, 0.500000000, 0.250000000, 0.500000000,
			0.125000000, 0.062500000, 0.031250000, 0.015625000,
			0.007812500, 0.003906250, 0.001953125, 0.0009765625,
			0.00048828125, 0.00024414063, 0.00012207031,
			0.00006103516, 0.00003051758 };

/************************** Layer II stuff ************************/


#ifndef ASM_OPTIMIZE

void II_dequantize_sample(
    unsigned int FAR	 sample[2][3][SBLIMIT],
    unsigned int	 bit_alloc[2][SBLIMIT],
    REAL FAR		 fraction[2][3][SBLIMIT],
    frame_params	*fr_ps)
{
    int		 i, j, k;
    int		 stereo = fr_ps->stereo;
    int		 sblimit = fr_ps->sblimit;
    al_table	*alloc = fr_ps->alloc;

#ifdef OPTIMIZE

    for (i = 0; i < sblimit; i++)
	for (j = 0; j < 3; j++) {
	    for (k = 0; k < stereo; k++) {
		REAL *f = &fraction[k][j][i];

		if (bit_alloc[k][i]) {
		    int  x = 0;
		    REAL r;
		    /*
		     * Locate MSB in the sample
		     * u is set outside loop to hopefully speed up the loop
		     */
		    unsigned int u = (*alloc)[i][bit_alloc[k][i]].steps;
		    while (((unsigned int) 1 << x) < u)
			x++;

		    /* MSB inversion */
		    u = sample[k][j][i];	/* read sample once */

		    x = 1L << (x - 1);		/* for MSB test */

		    if ((u & x))
			r = 0.0;		/* MSB is set */
		    else
			r = -1.0;		/* MSB is clear */

		    /*
		     * Form a 2's complement sample
		     * This is a slow operation
		     */
		    r += (REAL)(u & (x - 1)) / (REAL)x;

		    u = (*alloc)[i][bit_alloc[k][i]].quant;

		    /* Dequantize the sample */
		    r += d[u];
		    r *= c[u];
		    *f = r;		/* destination touched once */

		} else
		    *f = 0.0;
	    }
	}

#else /* original code */

    int x;

    for (i = 0; i < sblimit; i++)
	for (j = 0; j < 3; j++)
	    for (k = 0; k < stereo; k++)
		if (bit_alloc[k][i]) {
		    /* Locate MSB in the sample */
		    x = 0;
#ifndef MS_DOS
		    while ((1L<<x) < (*alloc)[i][bit_alloc[k][i]].steps)
			x++;
#else
		    /* Microsoft C thinks an int is a short */
		    while (( (unsigned long) (1L<<(long)x) <
			    (unsigned long)((*alloc)[i][bit_alloc[k][i]].steps))
			    && ( x < 16) )
			x++;
#endif
		    /* MSB inversion */
		    if (((sample[k][j][i] >> x-1) & 1) == 1)
			fraction[k][j][i] = 0.0;
		    else
			fraction[k][j][i] = -1.0;

		    /* Form a 2's complement sample */
		    fraction[k][j][i] +=
			(REAL)(sample[k][j][i] & ((1<<x-1)-1)) /
			(REAL)(1L<<x-1);

		    /* Dequantize the sample */
		    fraction[k][j][i] += d[(*alloc)[i][bit_alloc[k][i]].quant];
		    fraction[k][j][i] *= c[(*alloc)[i][bit_alloc[k][i]].quant];
		} else
		    fraction[k][j][i] = 0.0;

    for (i = sblimit; i < SBLIMIT; i++)
	for (j = 0; j < 3; j++)
	    for (k = 0; k < stereo; k++)
		fraction[k][j][i] = 0.0;

#endif /* OPTIMIZE */
}
#endif /* ASM_OPTIMIZE */


/***************************** Layer I stuff ***********************/

void I_dequantize_sample(
    unsigned int FAR sample[2][3][SBLIMIT],
    REAL FAR fraction[2][3][SBLIMIT],
    unsigned int bit_alloc[2][SBLIMIT],
    frame_params *fr_ps)
{
    int i, nb, k;
    int stereo = fr_ps->stereo;
    /*int sblimit = fr_ps->sblimit;*/

    for (i=0; i<SBLIMIT; i++)
	for (k=0; k<stereo; k++)
	    if (bit_alloc[k][i]) {
		nb = bit_alloc[k][i] + 1;
		if (((((sample[k][0][i]) >> nb) - 1) & 1) == 1)
		    fraction[k][0][i] = 0.0;
		else
		    fraction[k][0][i] = -1.0;

		fraction[k][0][i] += (sample[k][0][i] & (((1 << nb) - 1) - 1)) /
				     ((1L << nb) - 1);

		fraction[k][0][i] =
			    (fraction[k][0][i] + 1.0 / ((1L << nb) - 1)) *
				(1L<<nb) / ((1L<<nb)-1);
	    } else
		fraction[k][0][i] = 0.0;
}

/************************************************************
 *
 *   Restore the original value of the sample ie multiply
 *    the fraction value by its scalefactor.
 *
 ************************************************************/

/************************* Layer II Stuff **********************/

void II_denormalize_sample(
    REAL FAR		 fraction[2][3][SBLIMIT],
    unsigned int	 scale_index[2][3][SBLIMIT],
    frame_params	*fr_ps,
    int			 x)
{
    int		i, j;
    int		stereo = fr_ps->stereo;
    int		sblimit = fr_ps->sblimit;

#ifdef OPTIMIZE

    for (i = 0; i < sblimit; i++)
	for (j = 0; j < stereo; j++) {
	    REAL m = multiple[scale_index[j][x][i]];

	    fraction[j][0][i] *= m;
	    fraction[j][1][i] *= m;
	    fraction[j][2][i] *= m;
	}

#else /* original code */

    for (i = 0; i < sblimit; i++)
	for (j = 0; j < stereo; j++) {
	    fraction[j][0][i] *= multiple[scale_index[j][x][i]];
	    fraction[j][1][i] *= multiple[scale_index[j][x][i]];
	    fraction[j][2][i] *= multiple[scale_index[j][x][i]];
	}
#endif /* OPTIMIZE */
}

/**************************** Layer I stuff ******************************/

void I_denormalize_sample(
    REAL FAR fraction[2][3][SBLIMIT],
    unsigned int scale_index[2][3][SBLIMIT],
    frame_params *fr_ps)
{
    int i, j;
    int stereo = fr_ps->stereo;

    for (i=0;i<SBLIMIT;i++) for (j=0;j<stereo;j++)
        fraction[j][0][i] *= multiple[scale_index[j][0][i]];
}

/*****************************************************************
 *
 * The following are the subband synthesis routines. They apply
 * to both layer I and layer II stereo or mono. The user has to
 * decide what parameters are to be passed to the routines.
 *
 ***************************************************************/

/*************************************************************
 *
 *   Pass the subband sample through the synthesis window
 *
 **************************************************************/

/* create in synthesis filter */

void create_syn_filter(REAL FAR filter[64][SBLIMIT])
{
    int i, k;
#ifdef NO_MODFF
    double temp;
#endif

    for (i = 0; i < 64; i++)
	for (k = 0; k < 32; k++) {
#ifdef FSINGLE
	    filter[i][k] = 1e9 * cos((PI64*i+PI4)*(2.0*k+1.0));
	    if (filter[i][k] >= 0) {
		/*modff(filter[i][k]+0.5, &filter[i][k]);*/
		filter[i][k] = floor(filter[i][k] + 0.5);
	    } else {
		/*modff(filter[i][k]-0.5, &filter[i][k]);*/
		filter[i][k] = floor(filter[i][k] - 0.5);
	    }
#else /* FSINGLE */
	    filter[i][k] = 1e9*cos((PI64*i+PI4)*(2*k+1));
	    if (filter[i][k] >= 0)
		/*modf(filter[i][k]+0.5, &filter[i][k]);*/
		filter[i][k] = floor(filter[i][k] + 0.5);
	    else
		/*modf(filter[i][k]-0.5, &filter[i][k]);*/
		filter[i][k] = floor(filter[i][k] - 0.5);
#endif
	    filter[i][k] *= 1e-9;
	}
	/*
	 * filter[][] is now filled with values between -1 and 1.
	 */
}


#ifndef BUILTIN_TABLES

/***************************************************************
 *
 *   Window the restored sample
 *
 ***************************************************************/

/* read in synthesis window */

void read_syn_window(REAL FAR window[HAN_SIZE])
{
    int	  i, j[4];
    FILE *fp;
    REAL  f[4];
    char  t[150];

    if (!(fp = OpenTableFile("dewindow") )) {
        fprintf(stderr, "Please check synthesis window table 'dewindow'\n");
        exit(1);
    }
    for (i=0; i<512; i+=4) {
        fgets(t, 150, fp);
#ifdef FSINGLE
	sscanf(t,"D[%d] = %f D[%d] = %f D[%d] = %f D[%d] = %f\n",
		j, f, j+1, f+1, j+2, f+2, j+3, f+3);
#else
        sscanf(t,"D[%d] = %lf D[%d] = %lf D[%d] = %lf D[%d] = %lf\n",
		j, f, j+1, f+1, j+2, f+2, j+3, f+3);
#endif
        if (i == j[0]) {
            window[i] = f[0];
            window[i+1] = f[1];
            window[i+2] = f[2];
            window[i+3] = f[3];
        }
        else {
            fprintf(stderr, "Check index in synthesis window table\n");
            exit(1);
        }
        fgets(t, 150, fp);
    }
    fclose(fp);
}

#endif /* BUILTIN_TABLES */


#ifndef INT_MATH

/* Set to 1 for the optimization level from version 0.9.3 and earlier */
#define OPTIMIZE_093	0
/* Set to 1 for the "fuzzy zero detection" optimization in version 0.9.5 */
#define OPTIMIZE_095	0

int SubBandSynthesis(
    REAL	*bandPtr,
    int		 channel,
    short	*samples)
{
    int			 i, j;
    REAL		*bufOffsetPtr, sum;
    static int		 init = 1;
    typedef REAL	 NN[64][32];
    static NN FAR	*filter;
    typedef REAL	 BB[2][2*HAN_SIZE];
    static BB FAR	*buf;
    static int		 bufOffset[2] = {64,64};
    static REAL FAR	*window;
    int			 clip = 0;   /* count & return # samples clipped */
    int			 offset;
    long		 foo;
    int			 k;
#ifdef OPTIMIZE
    REAL		*filterp;
    REAL		*bandp;
# if OPTIMIZE_095
    int			 nzbcount, nzbands[32];	/* non-zero band detection */
# endif
#endif

    if (init == 1) {
        buf = (BB FAR *) mem_alloc(sizeof(BB),"BB");
        filter = (NN FAR *) mem_alloc(sizeof(NN), "NN");
        create_syn_filter(*filter);
        window = (REAL FAR *) mem_alloc(sizeof(REAL) * HAN_SIZE, "WIN");
        read_syn_window(window);
	init = 0;
    }

    bufOffset[channel] = (bufOffset[channel] - 64) & 0x3ff;
    bufOffsetPtr = &((*buf)[channel][bufOffset[channel]]);

#ifdef OPTIMIZE

# if OPTIMIZE_093

    filterp = (REAL *)((*filter)[0]);
    bandp = bandPtr;

    i = 64;
    while (i--) {

	*bufOffsetPtr++ =
	    bandp[0] * filterp[0] +
	    bandp[1] * filterp[1] +
	    bandp[2] * filterp[2] +
	    bandp[3] * filterp[3] +
	    bandp[4] * filterp[4] +
	    bandp[5] * filterp[5] +
	    bandp[6] * filterp[6] +
	    bandp[7] * filterp[7] +
	    bandp[8] * filterp[8] +
	    bandp[9] * filterp[9] +
	    bandp[10] * filterp[10] +
	    bandp[11] * filterp[11] +
	    bandp[12] * filterp[12] +
	    bandp[13] * filterp[13] +
	    bandp[14] * filterp[14] +
	    bandp[15] * filterp[15] +
	    bandp[16] * filterp[16] +
	    bandp[17] * filterp[17] +
	    bandp[18] * filterp[18] +
	    bandp[19] * filterp[19] +
	    bandp[20] * filterp[20] +
	    bandp[21] * filterp[21] +
	    bandp[22] * filterp[22] +
	    bandp[23] * filterp[23] +
	    bandp[24] * filterp[24] +
	    bandp[25] * filterp[25] +
	    bandp[26] * filterp[26] +
	    bandp[27] * filterp[27] +
	    bandp[28] * filterp[28] +
	    bandp[29] * filterp[29] +
	    bandp[30] * filterp[30] +
	    bandp[31] * filterp[31];

	filterp += 32;
    }

# elif OPTIMIZE_095

    filterp = (REAL *)((*filter)[0]);
    bandp = bandPtr;

    /* Analyze input */
    for (i = 0, nzbcount = 0; i < 32; i++)
	/* Fuzzy zero detection */
	if (bandp[i] < -0.00035 || bandp[i] > 0.00035)
	    nzbands[nzbcount++] = i;

    for (i = 0; i < 64; i++, filterp += 32) {

	/* Only add non-zero bands */
	for (j = 0, sum = 0.0; j < nzbcount; j++)
	    sum += bandp[nzbands[j]] * filterp[nzbands[j]];

	*bufOffsetPtr++ = sum;
    }

# else /*optimize using a fast cosine transform */

#define cos1_64  0.500602998235196
#define cos3_64  0.505470959897544
#define cos5_64  0.515447309922625
#define cos7_64  0.531042591089784
#define cos9_64  0.553103896034445
#define cos11_64 0.582934968206134
#define cos13_64 0.622504123035665
#define cos15_64 0.674808341455006
#define cos17_64 0.744536271002299
#define cos19_64 0.839349645415527
#define cos21_64 0.972568237861961
#define cos23_64 1.169439933432885
#define cos25_64 1.484164616314166
#define cos27_64 2.057781009953411
#define cos29_64 3.407608418468719
#define cos31_64 10.190008123548033
#define cos1_32  0.502419286188156
#define cos3_32  0.522498614939689
#define cos5_32  0.566944034816358
#define cos7_32  0.646821783359990
#define cos9_32  0.788154623451250
#define cos11_32 1.060677685990347
#define cos13_32 1.722447098238334
#define cos15_32 5.101148618689155
#define cos1_16  0.509795579104159
#define cos3_16  0.601344886935045
#define cos5_16  0.899976223136416
#define cos7_16  2.562915447741505
#define cos1_8   0.541196100146197
#define cos3_8   1.306562964876376
#define cos1_4   0.707106781186547

  {
    int i;
    register REAL tmp;
    REAL p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13,p14,p15;
    REAL pp0,pp1,pp2,pp3,pp4,pp5,pp6,pp7,pp8,pp9,pp10,pp11,pp12,pp13,pp14,pp15;

    /* Compute new values via a fast cosine transform */
    p0  = bandPtr[0] + bandPtr[31];
    p1  = bandPtr[1] + bandPtr[30];
    p2  = bandPtr[2] + bandPtr[29];
    p3  = bandPtr[3] + bandPtr[28];
    p4  = bandPtr[4] + bandPtr[27];
    p5  = bandPtr[5] + bandPtr[26];
    p6  = bandPtr[6] + bandPtr[25];
    p7  = bandPtr[7] + bandPtr[24];
    p8  = bandPtr[8] + bandPtr[23];
    p9  = bandPtr[9] + bandPtr[22];
    p10 = bandPtr[10]+ bandPtr[21];
    p11 = bandPtr[11]+ bandPtr[20];
    p12 = bandPtr[12]+ bandPtr[19];
    p13 = bandPtr[13]+ bandPtr[18];
    p14 = bandPtr[15]+ bandPtr[17];
    p15 = bandPtr[16]+ bandPtr[16];

    pp0 = p0 + p15;
    pp1 = p1 + p14;
    pp2 = p2 + p13;
    pp3 = p3 + p12;
    pp4 = p4 + p11;
    pp5 = p5 + p10;
    pp6 = p6 + p9;
    pp7 = p7 + p8;
    pp8 = cos1_32  * (p0 - p15);
    pp9 = cos3_32  * (p1 - p14);
    pp10= cos5_32  * (p2 - p13);
    pp11= cos7_32  * (p3 - p12);
    pp12= cos9_32  * (p4 - p11);
    pp13= cos11_32 * (p5 - p10);
    pp14= cos13_32 * (p6 - p9);
    pp15= cos15_32 * (p7 - p8);

    p0 = pp0 + pp7;
    p1 = pp1 + pp6;
    p2 = pp2 + pp5;
    p3 = pp3 + pp4;
    p4 = cos1_16 * (pp0 - pp7);
    p5 = cos3_16 * (pp1 - pp6);
    p6 = cos5_16 * (pp2 - pp5);
    p7 = cos7_16 * (pp3 - pp4);
    p8 = pp8 + pp15;
    p9 = pp9 + pp14;
    p10= pp10 + pp13;
    p11= pp11 + pp12;
    p12= cos1_16 * (pp8  - pp15);
    p13= cos3_16 * (pp9  - pp14);
    p14= cos5_16 * (pp10 - pp13);
    p15= cos7_16 * (pp11 - pp12);

    pp0 = p0 + p3;
    pp1 = p1 + p2;
    pp2 = cos1_8 * (p0 - p3);
    pp3 = cos3_8 * (p1 - p2);
    pp4 = p4 + p7;
    pp5 = p5 + p6;
    pp6 = cos1_8 * (p4 - p7);
    pp7 = cos3_8 * (p5 - p6);
    pp8 = p8 + p11;
    pp9 = p9 + p10;
    pp10= cos1_8 * (p8 - p11);
    pp11= cos3_8 * (p9 - p10);
    pp12= p12 + p15;
    pp13 = p13 + p14;
    pp14= cos1_8 * (p12 - p15);
    pp15= cos3_8 * (p13 - p14);

    p0 = pp0 + pp1;
    p1 = cos1_4 * (pp0 - pp1);
    p2 = pp2 + pp3;
    p3 = cos1_4 * (pp2 - pp3);
    p4 = pp4 + pp5;
    p5 = cos1_4 * (pp4 - pp5);
    p6 = pp6 + pp7;
    p7 = cos1_4 * (pp6 - pp7);
    p8 = pp8 + pp9;
    p9 = cos1_4 * (pp8 - pp9);
    p10= pp10 + pp11;
    p11= cos1_4 * (pp10 - pp11);
    p12= pp12 + pp13;
    p13= cos1_4 * (pp12 - pp13);
    p14= pp14 + pp15;
    p15= cos1_4 * (pp14 - pp15);

    tmp              = p6 + p7;
    bufOffsetPtr[36] = -(p5 + tmp);
    bufOffsetPtr[44] = -(p4 + tmp);
    tmp              = p11 + p15;
    bufOffsetPtr[10] = tmp;
    bufOffsetPtr[6]  = p13 + tmp;
    tmp              = p14 + p15;
    bufOffsetPtr[46] = -(p8  + p12 + tmp);
    bufOffsetPtr[34] = -(p9  + p13 + tmp);
    tmp             += p10 + p11;
    bufOffsetPtr[38] = -(p13 + tmp);
    bufOffsetPtr[42] = -(p12 + tmp);
    bufOffsetPtr[2]  = p9 + p13 + p15;
    bufOffsetPtr[4]  = p5 + p7;
    bufOffsetPtr[48] = -p0;
    bufOffsetPtr[0]  = p1;
    bufOffsetPtr[8]  = p3;
    bufOffsetPtr[12] = p7;
    bufOffsetPtr[14] = p15;
    bufOffsetPtr[40] = -(p2  + p3);

    p0  = cos1_64  * (bandPtr[0] - bandPtr[31]);
    p1  = cos3_64  * (bandPtr[1] - bandPtr[30]);
    p2  = cos5_64  * (bandPtr[2] - bandPtr[29]);
    p3  = cos7_64  * (bandPtr[3] - bandPtr[28]);
    p4  = cos9_64  * (bandPtr[4] - bandPtr[27]);
    p5  = cos11_64 * (bandPtr[5] - bandPtr[26]);
    p6  = cos13_64 * (bandPtr[6] - bandPtr[25]);
    p7  = cos15_64 * (bandPtr[7] - bandPtr[24]);
    p8  = cos17_64 * (bandPtr[8] - bandPtr[23]);
    p9  = cos19_64 * (bandPtr[9] - bandPtr[22]);
    p10 = cos21_64 * (bandPtr[10]- bandPtr[21]);
    p11 = cos23_64 * (bandPtr[11]- bandPtr[20]);
    p12 = cos25_64 * (bandPtr[12]- bandPtr[19]);
    p13 = cos27_64 * (bandPtr[13]- bandPtr[18]);
    p14 = cos29_64 * (bandPtr[14]- bandPtr[17]);
    p15 = cos31_64 * (bandPtr[15]- bandPtr[16]);

    pp0 = p0 + p15;
    pp1 = p1 + p14;
    pp2 = p2 + p13;
    pp3 = p3 + p12;
    pp4 = p4 + p11;
    pp5 = p5 + p10;
    pp6 = p6 + p9;
    pp7 = p7 + p8;
    pp8 = cos1_32  * (p0 - p15);
    pp9 = cos3_32  * (p1 - p14);
    pp10= cos5_32  * (p2 - p13);
    pp11= cos7_32  * (p3 - p12);
    pp12= cos9_32  * (p4 - p11);
    pp13= cos11_32 * (p5 - p10);
    pp14= cos13_32 * (p6 - p9);
    pp15= cos15_32 * (p7 - p8);

    p0 = pp0 + pp7;
    p1 = pp1 + pp6;
    p2 = pp2 + pp5;
    p3 = pp3 + pp4;
    p4 = cos1_16 * (pp0 - pp7);
    p5 = cos3_16 * (pp1 - pp6);
    p6 = cos5_16 * (pp2 - pp5);
    p7 = cos7_16 * (pp3 - pp4);
    p8 = pp8  + pp15;
    p9 = pp9  + pp14;
    p10= pp10 + pp13;
    p11= pp11 + pp12;
    p12= cos1_16 * (pp8  - pp15);
    p13= cos3_16 * (pp9  - pp14);
    p14= cos5_16 * (pp10 - pp13);
    p15= cos7_16 * (pp11 - pp12);

    pp0 = p0 + p3;
    pp1 = p1 + p2;
    pp2 = cos1_8 * (p0 - p3);
    pp3 = cos3_8 * (p1 - p2);
    pp4 = p4 + p7;
    pp5 = p5 + p6;
    pp6 = cos1_8 * (p4 - p7);
    pp7 = cos3_8 * (p5 - p6);
    pp8 = p8 + p11;
    pp9 = p9 + p10;
    pp10= cos1_8 * (p8 - p11);
    pp11= cos3_8 * (p9 - p10);
    pp12= p12 + p15;
    pp13= p13 + p14;
    pp14= cos1_8 * (p12 - p15);
    pp15= cos3_8 * (p13 - p14);

    p0 = pp0 + pp1;
    p1 = cos1_4 * (pp0 - pp1);
    p2 = pp2 + pp3;
    p3 = cos1_4 * (pp2 - pp3);
    p4 = pp4 + pp5;
    p5 = cos1_4 * (pp4 - pp5);
    p6 = pp6 + pp7;
    p7 = cos1_4 * (pp6 - pp7);
    p8 = pp8 + pp9;
    p9 = cos1_4 * (pp8 - pp9);
    p10= pp10 + pp11;
    p11= cos1_4 * (pp10 - pp11);
    p12= pp12 + pp13;
    p13= cos1_4 * (pp12 - pp13);
    p14= pp14 + pp15;
    p15= cos1_4 * (pp14 - pp15);

    tmp              = p13 + p15;
    bufOffsetPtr[1]  = p1 + p9 + tmp;
    bufOffsetPtr[5]  = p5 + p7 + p11 + tmp;
    tmp             += p9;
    bufOffsetPtr[33] = -(p1 + p14 + tmp);
    tmp             += p5 + p7;
    bufOffsetPtr[3]  = tmp;
    bufOffsetPtr[35] = -(p6 + p14 + tmp);
    tmp              = p10 + p11 + p12 + p13 + p14 + p15;
    bufOffsetPtr[39] = -(p2 + p3 + tmp - p12);
    bufOffsetPtr[43] = -(p4 + p6 + p7 + tmp - p13);
    bufOffsetPtr[37] = -(p5 + p6 + p7 + tmp - p12);
    bufOffsetPtr[41] = -(p2 + p3 + tmp - p13);
    tmp              = p8 + p12 + p14 + p15;
    bufOffsetPtr[47] = -(p0 + tmp);
    bufOffsetPtr[45] = -(p4 + p6 + p7 + tmp);
    tmp              = p11 + p15;
    bufOffsetPtr[11] = p7  + tmp;
    tmp             += p3;
    bufOffsetPtr[9]  = tmp;
    bufOffsetPtr[7]  = p13 + tmp;
    bufOffsetPtr[13] = p7 + p15;
    bufOffsetPtr[15] = p15;

    bufOffsetPtr[16] = 0.0;
    for (i = 0; i < 16; i++) {
	bufOffsetPtr[32-i] = -bufOffsetPtr[i];
	bufOffsetPtr[63-i] = bufOffsetPtr[33+i];
    }
  }

# endif /* OPTIMIZE_093 & OPTIMIZE_095 */

#else /* original code */

    for (i = 0; i < 64; i++) {
	sum = 0.0;
	for (k = 0; k < 32; k++)
	    sum += bandPtr[k] * (*filter)[i][k];
	bufOffsetPtr[i] = sum;
    }
#endif /* OPTIMIZE */


# if OPTIMIZE_095 /* code in version 0.9.5 and earlier */

#  ifdef DETECT_CLIP
    /* Detects and "smooths" clipping
     *
     *  S(i,j) = D(j+32i) * U(j+32i+((i+1)>>1)*64)
     *  samples(i,j) = MWindow(j+32i) * bufPtr(j+32i+((i+1)>>1)*64)
     */

    for (j = 0; j < 32; j++) {	/* for each subband... */
	offset = bufOffset[channel] + j;
	/*
	 * -1.144989014 <= window[x] <= 1.144989014  for all x
	 */
	sum = window[j] * (*buf)[channel][offset] +
	    window[j + 32] * (*buf)[channel][(96 + offset) & 0x3ff] +
	    window[j + 64] * (*buf)[channel][(128 + offset) & 0x3ff] +
	    window[j + 96] * (*buf)[channel][(224 + offset) & 0x3ff] +
	    window[j + 128] * (*buf)[channel][(256 + offset) & 0x3ff] +
	    window[j + 160] * (*buf)[channel][(352 + offset) & 0x3ff] +
	    window[j + 192] * (*buf)[channel][(384 + offset) & 0x3ff] +
	    window[j + 224] * (*buf)[channel][(480 + offset) & 0x3ff] +
	    window[j + 256] * (*buf)[channel][(512 + offset) & 0x3ff] +
	    window[j + 288] * (*buf)[channel][(608 + offset) & 0x3ff] +
	    window[j + 320] * (*buf)[channel][(640 + offset) & 0x3ff] +
	    window[j + 352] * (*buf)[channel][(736 + offset) & 0x3ff] +
	    window[j + 384] * (*buf)[channel][(768 + offset) & 0x3ff] +
	    window[j + 416] * (*buf)[channel][(864 + offset) & 0x3ff] +
	    window[j + 448] * (*buf)[channel][(896 + offset) & 0x3ff] +
	    window[j + 480] * (*buf)[channel][(992 + offset) & 0x3ff];

	/*
	 * At this point we hope we have -1 <= sum <= 1 otherwise, it'll
	 * have to be clipped anyway, so it's no big deal to clip it earlier
	 * or something.
	 */

	/*
	 * Casting truncates towards zero for both positive and negative
	 * numbers, the result is cross-over distortion,  1995-07-12 shn
	 */

	if (sum > 0) {
	    foo = (long)(sum * SCALE + 0.5);
	} else {
	    foo = (long)(sum * SCALE - 0.5);
	}

	if (foo >= (long)SCALE) {
	    samples[j] = (short)(SCALE-1);
	    ++clip;
	} else if (foo < (long)-SCALE) {
	    samples[j] = (short)(-SCALE);
	    ++clip;
	} else
	    samples[j] = (short)foo;
    }

#  else /* !DETECT_CLIP */

    /* Faster, but does not detect clipping
     *
     *  S(i,j) = D(j+32i) * U(j+32i+((i+1)>>1)*64)
     *  samples(i,j) = MWindow(j+32i) * bufPtr(j+32i+((i+1)>>1)*64)
     */
    for (j = 0; j < 32; j++) {
	sum = 0.0;

	for (i = 0; i < 16; i++) {
	    k = j + (i << 5);
	    sum += window[k] * (*buf)[channel]
		    [((k + (((i+1) >> 1) << 6)) + bufOffset[channel]) & 0x3ff];
	}
	samples[j] = sum * SCALE;
    }

#  endif /* DETECT_CLIP */

# else /* most optimized */

  {
    REAL* dt[16];
    for (i = 0; i < 16; i++) {
	k = (i << 5);
	dt[i]=(*buf)[channel] + ( ((k + (((i+1) >> 1) << 6)) +
				bufOffset[channel]) & 0x3ff);
    }

    for (j = 0; j < 32; j++) {
	/* Unroll loop
	 * Here we multiply every subband by the appropriate scale
	 * factor and add 'em all up, so we get one sample.
	 */
	sum =
	    window[j] * dt[0][j]+
	    window[j + 32] * dt[1][j]+
	    window[j + 64] * dt[2][j]+
	    window[j + 96] * dt[3][j]+
	    window[j + 128] * dt[4][j]+
	    window[j + 160] * dt[5][j]+
	    window[j + 192] * dt[6][j]+
	    window[j + 224] * dt[7][j]+
	    window[j + 256] * dt[8][j]+
	    window[j + 288] * dt[9][j]+
	    window[j + 320] * dt[10][j]+
	    window[j + 352] * dt[11][j]+
	    window[j + 384] * dt[12][j]+
	    window[j + 416] * dt[13][j]+
	    window[j + 448] * dt[14][j]+
	    window[j + 480] * dt[15][j];

	/*foo = floor(sum * SCALE);*/
	if (sum > 0) {
	    foo = (long)(sum * SCALE + 0.5);
	} else {
	    foo = (long)(sum * SCALE - 0.5);
	}

#  ifdef DETECT_CLIP
	if (foo >= (long)SCALE) {
	    samples[j] = (short)(SCALE-1);
	    ++clip;
	} else if (foo < (long)-SCALE) {
	    samples[j] = (short)(-SCALE);
	    ++clip;
	} else
#  endif
	  samples[j] = foo;
      }
    }

# endif /* OPTIMIZE_095 */

    return clip;
}

#else /* INT_MATH */

int SubBandSynthesis(
    REAL	*bandPtr,
    int		 channel,
    short	*samples)
{
    int			 i, j, k;
    int			 sum;
    int			*bufOffsetPtr;
    static int		 init = 1;
    typedef REAL	 RNN[64][32];
    static RNN FAR	*Rfilter;
    typedef int		 NN[64][32];
    static NN FAR	*filter;
    typedef int		 BB[2][2*HAN_SIZE];
    static BB FAR	*buf;
    static int		 bufOffset[2] = {64,64};
    static int		*window;
    static REAL FAR	*realWindow;
    int			 clip = 0;   /* count & return # samples clipped */
    long		 foo;
    int			 offset;
    int			*filterp;
    REAL		*bandp;

    if (init == 1) {
	buf = (BB FAR *) mem_alloc(sizeof(BB),"BB");

	filter = (NN FAR *) mem_alloc(sizeof(NN), "NN");
	Rfilter = (RNN FAR *) mem_alloc(sizeof(RNN), "RNN");
	create_syn_filter(*Rfilter);
	for (i = 0; i < 64; i++) {
	    for (j = 0; j < 32; j++) {
		(*filter)[i][j] = (*Rfilter)[i][j] * 32768;
	    }
	}

	window = (int *) mem_alloc(sizeof(int) * HAN_SIZE, "WIN");
	realWindow = (REAL FAR *) mem_alloc(sizeof(REAL FAR) * HAN_SIZE, "WIN");
	read_syn_window(realWindow);
	/*
	 * Let's now convert those nasty REALs to nice 'n fast ints.
	 * This should really be done in read_syn_window() but for now this
	 * at least keeps all the bugs in one place.
	 */
	for (i = 0; i < HAN_SIZE; i++) {
	    window[i] = realWindow[i] * 32768;
	}

	init = 0;
    }

    bufOffset[channel] = (bufOffset[channel] - 64) & 0x3ff;
    bufOffsetPtr = &((*buf)[channel][bufOffset[channel]]);

    filterp = (int *)((*filter)[0]);
    bandp = bandPtr;

    i = 64;

    while (i--) {
	/*
	 * -1 <= filter <= 1
	 */
	*bufOffsetPtr++ =
	    bandp[0] * filterp[0] +
	    bandp[1] * filterp[1] +
	    bandp[2] * filterp[2] +
	    bandp[3] * filterp[3] +
	    bandp[4] * filterp[4] +
	    bandp[5] * filterp[5] +
	    bandp[6] * filterp[6] +
	    bandp[7] * filterp[7] +
	    bandp[8] * filterp[8] +
	    bandp[9] * filterp[9] +
	    bandp[10] * filterp[10] +
	    bandp[11] * filterp[11] +
	    bandp[12] * filterp[12] +
	    bandp[13] * filterp[13] +
	    bandp[14] * filterp[14] +
	    bandp[15] * filterp[15] +
	    bandp[16] * filterp[16] +
	    bandp[17] * filterp[17] +
	    bandp[18] * filterp[18] +
	    bandp[19] * filterp[19] +
	    bandp[20] * filterp[20] +
	    bandp[21] * filterp[21] +
	    bandp[22] * filterp[22] +
	    bandp[23] * filterp[23] +
	    bandp[24] * filterp[24] +
	    bandp[25] * filterp[25] +
	    bandp[26] * filterp[26] +
	    bandp[27] * filterp[27] +
	    bandp[28] * filterp[28] +
	    bandp[29] * filterp[29] +
	    bandp[30] * filterp[30] +
	    bandp[31] * filterp[31];
	filterp += 32;
    }

    /* Detects and "smooths" clipping
     *
     *  S(i,j) = D(j+32i) * U(j+32i+((i+1)>>1)*64)
     *  samples(i,j) = MWindow(j+32i) * bufPtr(j+32i+((i+1)>>1)*64)
     */

    for (j = 0; j < 32; j++) {	/* for each subband... */
	offset = bufOffset[channel] + j;
	sum =
	   (window[j] * (*buf)[channel][offset] +
	    window[j + 32] * (*buf)[channel][(96 + offset) & 0x3ff] +
	    window[j + 64] * (*buf)[channel][(128 + offset) & 0x3ff] +
	    window[j + 96] * (*buf)[channel][(224 + offset) & 0x3ff] +
	    window[j + 128] * (*buf)[channel][(256 + offset) & 0x3ff] +
	    window[j + 160] * (*buf)[channel][(352 + offset) & 0x3ff] +
	    window[j + 192] * (*buf)[channel][(384 + offset) & 0x3ff] +
	    window[j + 224] * (*buf)[channel][(480 + offset) & 0x3ff] +
	    window[j + 256] * (*buf)[channel][(512 + offset) & 0x3ff] +
	    window[j + 288] * (*buf)[channel][(608 + offset) & 0x3ff] +
	    window[j + 320] * (*buf)[channel][(640 + offset) & 0x3ff] +
	    window[j + 352] * (*buf)[channel][(736 + offset) & 0x3ff] +
	    window[j + 384] * (*buf)[channel][(768 + offset) & 0x3ff] +
	    window[j + 416] * (*buf)[channel][(864 + offset) & 0x3ff] +
	    window[j + 448] * (*buf)[channel][(896 + offset) & 0x3ff] +
	    window[j + 480] * (*buf)[channel][(992 + offset) & 0x3ff]) / 32768;

	/*
	 * Casting truncates towards zero for both positive and negative
	 * numbers, the result is cross-over distortion,  1995-07-12 shn
	 */

	if (sum >= (long)SCALE) {
	    samples[j] = (short)(SCALE-1);
	    ++clip;
	} else if (sum < (long)-SCALE) {
	    samples[j] = (short)(-SCALE);
	    ++clip;
	} else
	    samples[j] = (short)sum;
    }

    return clip;
}

#endif /* INT_MATH */


/* Shift left and right channels for AIFF output */
static void swapWordsInLong(short *loc, int words)
{
    int			 i;
    unsigned short	 dst0, dst1;
    unsigned short	*src = (unsigned short*)loc;

    for (i = 0; i < words; i += 2) {
	dst0 = src[0];
	dst1 = src[1];
	*src++ = dst1;
	*src++ = dst0;
    }
}


void out_fifo(
    short FAR		 pcm_sample[2][SSLIMIT][SBLIMIT],
    int			 num,
    frame_params	*fr_ps,
    int			 done,
    FILE		*outFile,
    unsigned long	*psampFrames)
{
#   define		BUFFERSIZE 8192		/* x 2 bytes */
    int			i, j, l;
    int			stereo = fr_ps->stereo;
    /*int		sblimit = fr_ps->sblimit;*/
    static short int	outsamp[BUFFERSIZE];
    static long		k = 0;

    if (!done) {
        for (i = 0; i < num; i++)
	    for (j = 0; j < SBLIMIT; j++) {	/* SBLIMIT = 32 */
		(*psampFrames)++;
		for (l = 0; l < stereo; l++) {
		    if (!(k & (BUFFERSIZE-1)) && k) {
#ifndef SOLARIS_SPARC
			/*
			 * Samples are big-endian. If this is a little-endian
			 * machine we must swap
			 */
			if ( NativeByteOrder == order_unknown ) {
			    NativeByteOrder = DetermineByteOrder();
			    if ( NativeByteOrder == order_unknown ) {
				fprintf(stderr, "byte order not determined\n" );
				exit(1);
			    }
			}
			if ( NativeByteOrder == order_littleEndian )
			    SwapBytesInWords(outsamp, BUFFERSIZE);
#endif
			if (Arguments.write_to_file) {
			    if (stereo == 2) {
				/* Swap channels for AIFF file */
				swapWordsInLong(outsamp, BUFFERSIZE);
			    }
			    fwrite(outsamp, 2, BUFFERSIZE, outFile);
			} else
			    sound_write(audiofd, outsamp, BUFFERSIZE * 2);
			k = 0;
		    }
		    outsamp[k++] = pcm_sample[l][i][j];
		}
	    }

    } else { /* if done */

	if (Arguments.write_to_file) {
	    if (stereo == 2) {
		/* Swap channels for AIFF file */
		swapWordsInLong(outsamp, (int)k);
	    }
	    fwrite(outsamp, 2, (int)k, outFile);
	} else
	    sound_write(audiofd, outsamp, (int)k * 2);
	k = 0;
    }
}


void buffer_CRC(Bit_stream_struc *bs, unsigned int *old_crc)
{
    *old_crc = getbits(bs, 16);
}


void recover_CRC_error(
    short FAR	   pcm_sample[2][SSLIMIT][SBLIMIT],
    int		   error_count,
    frame_params  *fr_ps,
    FILE	  *outFile,
    unsigned long *psampFrames)
{
    int		 stereo = fr_ps->stereo;
    int		 num, done, i;
    int		 samplesPerFrame, samplesPerSlot;
    layer	*hdr = fr_ps->header;
    long	 offset;
    short	*temp;

    num = 3;
    if (hdr->lay == 1)
	num = 1;

    samplesPerSlot = SBLIMIT * num * stereo;
    samplesPerFrame = samplesPerSlot * 32;

    if (error_count == 1) {     /* replicate previous error_free frame */
        done = 1;
        /* Flush out fifo */
        out_fifo(pcm_sample, num, fr_ps, done, outFile, psampFrames);
        /* Go back to the beginning of the previous frame */
        offset = sizeof(short int) * samplesPerFrame;
        fseek(outFile, -offset, SEEK_CUR);
        done = 0;
        for (i = 0; i < SCALE_BLOCK; i++) {
            fread(pcm_sample, 2, samplesPerSlot, outFile);
            out_fifo(pcm_sample, num, fr_ps, done, outFile, psampFrames);
        }

    } else {                       /* mute the frame */

        temp = (short*)pcm_sample;
        done = 0;
        for (i = 0; i < 2*3*SBLIMIT; i++)
            *temp++ = MUTE;     /* MUTE value is in decoder.h */
        for (i = 0; i < SCALE_BLOCK; i++)
            out_fifo(pcm_sample, num, fr_ps, done, outFile, psampFrames);
    }
}

/************************* Layer III routines **********************/

void III_get_side_info(
    Bit_stream_struc *bs,
    III_side_info_t *si,
    frame_params *fr_ps)
{
   int	ch, gr, i;
   int	stereo = fr_ps->stereo;

   if (fr_ps->header->version != MPEG_PHASE2_LSF) {
	si->main_data_begin = getbits(bs, 9);
	if (stereo == 1)
	    si->private_bits = getbits(bs,5);
	else
	    si->private_bits = getbits(bs,3);

       for (ch = 0; ch < stereo; ch++)
	    for (i = 0; i < 4; i++)
		si->ch[ch].scfsi[i] = get1bit(bs);

	for (gr = 0; gr < 2 ; gr++) {
	    for (ch = 0; ch < stereo; ch++) {
		si->ch[ch].gr[gr].part2_3_length = getbits(bs, 12);
		si->ch[ch].gr[gr].big_values = getbits(bs, 9);
		si->ch[ch].gr[gr].global_gain = getbits(bs, 8);
		si->ch[ch].gr[gr].scalefac_compress = getbits(bs, 4);
		si->ch[ch].gr[gr].window_switching_flag = get1bit(bs);
		if (si->ch[ch].gr[gr].window_switching_flag) {
		    si->ch[ch].gr[gr].block_type = getbits(bs, 2);
		    si->ch[ch].gr[gr].mixed_block_flag = get1bit(bs);
		    for (i = 0; i < 2; i++)
			si->ch[ch].gr[gr].table_select[i] = getbits(bs, 5);
		    for (i = 0; i < 3; i++)
			si->ch[ch].gr[gr].subblock_gain[i] = getbits(bs, 3);
		    /*
		     * Set region_count parameters since they are
		     * implicit in this case.
		     */
		    if (si->ch[ch].gr[gr].block_type == 0) {
			fprintf(stderr, "Side info bad: block_type 0 in split block\n");
			exit(0);
		    } else if (si->ch[ch].gr[gr].block_type == 2
			    && si->ch[ch].gr[gr].mixed_block_flag == 0)
			si->ch[ch].gr[gr].region0_count = 8; /* MI 9; */
		    else
			si->ch[ch].gr[gr].region0_count = 7; /* MI 8; */
		    si->ch[ch].gr[gr].region1_count =
				20 - si->ch[ch].gr[gr].region0_count;
		} else {
		    for (i = 0; i < 3; i++)
			si->ch[ch].gr[gr].table_select[i] = getbits(bs, 5);
		    si->ch[ch].gr[gr].region0_count = getbits(bs, 4);
		    si->ch[ch].gr[gr].region1_count = getbits(bs, 3);
		    si->ch[ch].gr[gr].block_type = 0;
		}
		si->ch[ch].gr[gr].preflag = get1bit(bs);
		si->ch[ch].gr[gr].scalefac_scale = get1bit(bs);
		si->ch[ch].gr[gr].count1table_select = get1bit(bs);
	    }
	}

    } else {	/* Layer 3 LSF */

	si->main_data_begin = getbits(bs, 8);
	if (stereo == 1)
	     si->private_bits = getbits(bs,1);
	else
	    si->private_bits = getbits(bs,2);

	for (gr = 0; gr < 1 ; gr++) {
	    for (ch = 0; ch < stereo; ch++) {
		si->ch[ch].gr[gr].part2_3_length = getbits(bs, 12);
		si->ch[ch].gr[gr].big_values = getbits(bs, 9);
		si->ch[ch].gr[gr].global_gain = getbits(bs, 8);
		si->ch[ch].gr[gr].scalefac_compress = getbits(bs, 9);
		si->ch[ch].gr[gr].window_switching_flag = get1bit(bs);
		if (si->ch[ch].gr[gr].window_switching_flag) {
		    si->ch[ch].gr[gr].block_type = getbits(bs, 2);
		    si->ch[ch].gr[gr].mixed_block_flag = get1bit(bs);
		    for (i = 0; i < 2; i++)
			si->ch[ch].gr[gr].table_select[i] = getbits(bs, 5);
		    for (i = 0; i < 3; i++)
			si->ch[ch].gr[gr].subblock_gain[i] = getbits(bs, 3);
		    /*
		     * Set region_count parameters since they are
		     * implicit in this case.
		     */
		    if (si->ch[ch].gr[gr].block_type == 0) {
			fprintf(stderr, "Side info bad: block_type 0 in split block\n");
			exit(0);
		    } else if (si->ch[ch].gr[gr].block_type == 2
			    && si->ch[ch].gr[gr].mixed_block_flag == 0)
			si->ch[ch].gr[gr].region0_count = 8; /* MI 9; */
		    else
			si->ch[ch].gr[gr].region0_count = 7; /* MI 8; */
		    si->ch[ch].gr[gr].region1_count =
				20 - si->ch[ch].gr[gr].region0_count;
		} else {
		    for (i = 0; i < 3; i++)
			si->ch[ch].gr[gr].table_select[i] = getbits(bs, 5);
		    si->ch[ch].gr[gr].region0_count = getbits(bs, 4);
		    si->ch[ch].gr[gr].region1_count = getbits(bs, 3);
		    si->ch[ch].gr[gr].block_type = 0;
		}

		si->ch[ch].gr[gr].scalefac_scale = get1bit(bs);
		si->ch[ch].gr[gr].count1table_select = get1bit(bs);
	    }
	}
    }
}


struct {
   int l[5];
   int s[3];} sfbtable = {{0, 6, 11, 16, 21},
                          {0, 6, 12}};

int slen[2][16] = {{0, 0, 0, 0, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4},
                   {0, 1, 2, 3, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 3}};
struct  {
   int l[23];
   int s[14];} sfBandIndex[6] =
    {{{0,6,12,18,24,30,36,44,54,66,80,96,116,140,168,200,238,284,336,396,464,522,576},
     {0,4,8,12,18,24,32,42,56,74,100,132,174,192}},
    {{0,6,12,18,24,30,36,44,54,66,80,96,114,136,162,194,232,278,330,394,464,540,576},
     {0,4,8,12,18,26,36,48,62,80,104,136,180,192}},
    {{0,6,12,18,24,30,36,44,54,66,80,96,116,140,168,200,238,284,336,396,464,522,576},
     {0,4,8,12,18,26,36,48,62,80,104,134,174,192}},

    {{0,4,8,12,16,20,24,30,36,44,52,62,74,90,110,134,162,196,238,288,342,418,576},
     {0,4,8,12,16,22,30,40,52,66,84,106,136,192}},
    {{0,4,8,12,16,20,24,30,36,42,50,60,72,88,106,128,156,190,230,276,330,384,576},
     {0,4,8,12,16,22,28,38,50,64,80,100,126,192}},
    {{0,4,8,12,16,20,24,30,36,44,54,66,82,102,126,156,194,240,296,364,448,550,576},
     {0,4,8,12,16,22,30,42,58,78,104,138,180,192}}};



void III_get_scale_factors(
    III_scalefac_t	*scalefac,
    III_side_info_t	*si,
    int			 gr,
    int			 ch,
    frame_params	*fr_ps)
{
    int			 sfb, i, window;
    struct gr_info_s	*gr_info = &(si->ch[ch].gr[gr]);

    if (gr_info->window_switching_flag && (gr_info->block_type == 2)) {

	if (gr_info->mixed_block_flag) {	/* MIXED */ /* NEW - ag 11/25 */
	    for (sfb = 0; sfb < 8; sfb++)
		(*scalefac)[ch].l[sfb] =
			hgetbits(slen[0][gr_info->scalefac_compress]);
	    for (sfb = 3; sfb < 6; sfb++)
		for (window=0; window<3; window++)
		    (*scalefac)[ch].s[window][sfb] =
			hgetbits(slen[0][gr_info->scalefac_compress]);
	    for (sfb = 6; sfb < 12; sfb++)
		for (window=0; window<3; window++)
		    (*scalefac)[ch].s[window][sfb] =
			hgetbits(slen[1][gr_info->scalefac_compress]);
	    for (sfb=12,window=0; window<3; window++)
		(*scalefac)[ch].s[window][sfb] = 0;

	} else {				/* SHORT*/

	    for (i=0; i<2; i++)
		for (sfb = sfbtable.s[i]; sfb < sfbtable.s[i+1]; sfb++)
		    for (window=0; window<3; window++)
			(*scalefac)[ch].s[window][sfb] =
				hgetbits(slen[i][gr_info->scalefac_compress]);
	    for (sfb=12,window=0; window<3; window++)
		(*scalefac)[ch].s[window][sfb] = 0;
	}

    } else {   /* LONG types 0,1,3 */

	for (i=0; i<4; i++) {
	    if ((si->ch[ch].scfsi[i] == 0) || (gr == 0))
		for (sfb = sfbtable.l[i]; sfb < sfbtable.l[i+1]; sfb++)
		    (*scalefac)[ch].l[sfb] =
			  hgetbits(slen[(i<2)?0:1][gr_info->scalefac_compress]);
	}
	(*scalefac)[ch].l[22] = 0;
    }
}

/****************** new MPEG2 stuf  ***********/

static unsigned nr_of_sfb_block[6][3][4] = {{{6, 5, 5, 5},{ 9, 9, 9, 9 },{6, 9, 9, 9}},
                                         {{6, 5, 7, 3},{ 9, 9, 12, 6},{6, 9, 12, 6}},
                                         {{11, 10, 0, 0},{ 18, 18, 0, 0},{15,18,0,0 }},
                                         {{7, 7, 7, 0},{ 12, 12, 12, 0},{6, 15, 12, 0}},
                                         {{6, 6, 6, 3},{12, 9, 9, 6},{6, 12, 9, 6}},
                                         {{8, 8, 5, 0},{15,12,9,0},{6,18,9,0}}};
static unsigned scalefac_buffer[54];

void III_get_LSF_scale_data(
    III_scalefac_t *scalefac,
    III_side_info_t *si,
    int gr, int ch,
    frame_params *fr_ps)
{
short i, j, k;
short blocktypenumber, blocknumber = 0;

struct gr_info_s *gr_info = &(si->ch[ch].gr[gr]);
unsigned scalefac_comp, int_scalefac_comp, new_slen[4];

layer *hdr = fr_ps->header;
scalefac_comp =  gr_info->scalefac_compress;

    blocktypenumber = 0;
    if ((gr_info->block_type == 2) && (gr_info->mixed_block_flag == 0))
                                                        blocktypenumber = 1;

   if ((gr_info->block_type == 2) && (gr_info->mixed_block_flag == 1))
                                                        blocktypenumber = 2;

    if(!((( hdr->mode_ext == 1) || (hdr->mode_ext == 3)) && (ch == 1)))
    {
	if(scalefac_comp < 400)
        {
		new_slen[0] = (scalefac_comp >> 4) / 5 ;
		new_slen[1] = (scalefac_comp >> 4) % 5 ;
		new_slen[2] = (scalefac_comp % 16) >> 2 ;
		new_slen[3] = (scalefac_comp % 4);
                si->ch[ch].gr[gr].preflag = 0;

                blocknumber = 0;
         }

	else if( scalefac_comp  < 500)
        {
		new_slen[0] = ((scalefac_comp - 400 )  >> 2) / 5 ;
		new_slen[1] = ((scalefac_comp - 400) >> 2) % 5 ;
		new_slen[2] = (scalefac_comp - 400 ) % 4 ;
		new_slen[3] = 0;
                si->ch[ch].gr[gr].preflag = 0;
                blocknumber = 1;

        }

	else if( scalefac_comp  < 512)
        {
		new_slen[0] = (scalefac_comp - 500 ) / 3 ;
		new_slen[1] = (scalefac_comp - 500)  % 3 ;
		new_slen[2] = 0 ;
		new_slen[3] = 0;
                si->ch[ch].gr[gr].preflag = 1;
                blocknumber = 2;

        }
     }

    if((((hdr->mode_ext == 1) || (hdr->mode_ext == 3)) && (ch == 1)))
    {
      /*   intensity_scale = scalefac_comp %2; */
         int_scalefac_comp = scalefac_comp >> 1;

        if(int_scalefac_comp  < 180)
        {
		new_slen[0] = int_scalefac_comp  / 36 ;
		new_slen[1] = (int_scalefac_comp % 36 ) / 6 ;
		new_slen[2] = (int_scalefac_comp % 36) % 6;
		new_slen[3] = 0;
                si->ch[ch].gr[gr].preflag = 0;
                blocknumber = 3;

         }

	else if( int_scalefac_comp  < 244)
        {
		new_slen[0] = ((int_scalefac_comp - 180 )  % 64 ) >> 4 ;
		new_slen[1] = ((int_scalefac_comp - 180) % 16) >> 2 ;
		new_slen[2] = (int_scalefac_comp - 180 ) % 4 ;
		new_slen[3] = 0;
                si->ch[ch].gr[gr].preflag = 0;
                blocknumber = 4;

        }

	else if( int_scalefac_comp  < 255)
        {
		new_slen[0] = (int_scalefac_comp - 244 ) / 3 ;
		new_slen[1] = (int_scalefac_comp - 244 )  % 3 ;
		new_slen[2] = 0 ;
		new_slen[3] = 0;
                si->ch[ch].gr[gr].preflag = 0;
                blocknumber = 5;

        }
     }

     for (i=0;i< 45;i++) scalefac_buffer[i] = 0;

     k = 0;
     for (i = 0;i < 4;i++)
     {
      	for(j = 0; j < nr_of_sfb_block[blocknumber][blocktypenumber][i]; j++)
        {
           if(new_slen[i] == 0)
           {
	        scalefac_buffer[k] = 0;
           }
           else
           {
     	       scalefac_buffer[k] =  hgetbits(new_slen[i]);
           }
           k++;

        }
     }

}


void III_get_LSF_scale_factors(
    III_scalefac_t	*scalefac,
    III_side_info_t	*si,
    int			 gr,
    int			 ch,
    frame_params	*fr_ps)
{
    int			 sfb, /*i,*/k = 0, window;
    struct gr_info_s	*gr_info = &(si->ch[ch].gr[gr]);

    III_get_LSF_scale_data(scalefac, si, gr, ch, fr_ps);

    if (gr_info->window_switching_flag && (gr_info->block_type == 2)) {

	if (gr_info->mixed_block_flag) {	/* MIXED */ /* NEW - ag 11/25 */
	    for (sfb = 0; sfb < 8; sfb++) {
		(*scalefac)[ch].l[sfb] = scalefac_buffer[k];
		k++;
	    }
	    for (sfb = 3; sfb < 12; sfb++)
		for (window=0; window<3; window++) {
		    (*scalefac)[ch].s[window][sfb] = scalefac_buffer[k];
		    k++;
		}
	    for (sfb=12,window=0; window<3; window++)
		(*scalefac)[ch].s[window][sfb] = 0;

	} else {				/* SHORT*/

	    for (sfb = 0; sfb < 12; sfb++)
		for (window=0; window<3; window++) {
		    (*scalefac)[ch].s[window][sfb] = scalefac_buffer[k];
		    k++;
		}
	    for (sfb=12,window=0; window<3; window++)
		(*scalefac)[ch].s[window][sfb] = 0;
	}

    } else {					/* LONG types 0,1,3 */

	for (sfb = 0; sfb < 21; sfb++) {
	    (*scalefac)[ch].l[sfb] = scalefac_buffer[k];
	    k++;
	}
	(*scalefac)[ch].l[22] = 0;
    }
}


#ifndef BUILTIN_TABLES

void initialize_huffman() {
    FILE	*fi;
    static int	 huffman_initialized = FALSE;

    if (huffman_initialized)
	return;
    if (!(fi = OpenTableFile("huffdec") )) {
	fprintf(stderr, "Please check huffman table 'huffdec'\n");
	exit(1);
    }

    if (fi == NULL) {
	fprintf(stderr,"decoder table open error\n");
	exit(3);
    }

    if (read_decoder_table(fi) != HTN) {
	fprintf(stderr,"decoder table read error\n");
	exit(4);
    }
    fclose(fi);
    huffman_initialized = TRUE;
}
#endif


/*#define OPTIMIZETHIS*/

void III_hufman_decode(
    long int		 is[SBLIMIT][SSLIMIT],
    III_side_info_t	*si,
    int			 ch,
    int			 gr,
    int			 part2_start,
    frame_params	*fr_ps)
{
    int			 i, x, y;
    int			 v, w;
    struct huffcodetab	*h;
    int			 region1Start;
    int			 region2Start;
    int			 sfreq;
    int			 currentBit, grBits;
    my_gr_info		*gi;
#ifdef OPTIMIZETHIS
    long int		*isptr;
#endif
    /*int		 bt = (*si).ch[ch].gr[gr].window_switching_flag &&
			     ((*si).ch[ch].gr[gr].block_type == 2);*/

    gi = (my_gr_info *) &(*si).ch[ch].gr[gr];
    sfreq = fr_ps->header->sampling_frequency + (fr_ps->header->version * 3);
    initialize_huffman();

    /* Find region boundary for short block case */

    if ( ((*si).ch[ch].gr[gr].window_switching_flag) &&
	((*si).ch[ch].gr[gr].block_type == 2) ) {

	/* Region2 */
	region1Start = 36;	/* sfb[9/3]*3=36 */
	region2Start = 576;	/* No Region2 for short block case */

    } else {	   /* Find region boundary for long block case */

	region1Start = sfBandIndex[sfreq]
			.l[(*si).ch[ch].gr[gr].region0_count + 1]; /* MI */
	region2Start = sfBandIndex[sfreq]
			.l[(*si).ch[ch].gr[gr].region0_count +
			(*si).ch[ch].gr[gr].region1_count + 2]; /* MI */
    }

    grBits     = part2_start + (*si).ch[ch].gr[gr].part2_3_length;
    currentBit = hsstell();

    /* Read bigvalues area */
    for (i = 0; i < (*si).ch[ch].gr[gr].big_values * 2; i += 2) {
	if (i < region1Start)
	    h = &ht[(*si).ch[ch].gr[gr].table_select[0]];
	else if (i<region2Start)
	    h = &ht[(*si).ch[ch].gr[gr].table_select[1]];
	else
	    h = &ht[(*si).ch[ch].gr[gr].table_select[2]];
	huffman_decoder(h, &x, &y, &v, &w);
	is[i / SSLIMIT][i % SSLIMIT] = x;
	is[(i+1) / SSLIMIT][(i+1) % SSLIMIT] = y;
    }

    grBits     = part2_start + (*si).ch[ch].gr[gr].part2_3_length;
    currentBit = hsstell();

    /* Read count1 area */
    h = &ht[(*si).ch[ch].gr[gr].count1table_select + 32];
#ifdef OPTIMIZETHIS
    isptr = &is[i / SSLIMIT][i % SSLIMIT];
    while ((hsstell() < part2_start + (*si).ch[ch].gr[gr].part2_3_length ) &&
		(isptr < &is[SBLIMIT-1][SSLIMIT-1]) ) {
	huffman_decoder(h, (int *)&isptr[2], (int *)&isptr[3],
			   (int *)&isptr[0], (int *)&isptr[1]);
	isptr += 4;
    }
#else
    while ((hsstell() < part2_start + (*si).ch[ch].gr[gr].part2_3_length ) &&
		( i < SSLIMIT*SBLIMIT )) {
	huffman_decoder(h, &x, &y, &v, &w);
	is[i / SSLIMIT][i % SSLIMIT] = v;
	is[(i+1) / SSLIMIT][(i+1) % SSLIMIT] = w;
	is[(i+2) / SSLIMIT][(i+2) % SSLIMIT] = x;
	is[(i+3) / SSLIMIT][(i+3) % SSLIMIT] = y;
	i += 4;
    }
#endif

    grBits     = part2_start + (*si).ch[ch].gr[gr].part2_3_length;
    currentBit = hsstell();

    if (hsstell() > part2_start + (*si).ch[ch].gr[gr].part2_3_length) {
	i -=4;
	rewindNbits(hsstell()-part2_start - (*si).ch[ch].gr[gr].part2_3_length);
    }

    /* Dismiss stuffing Bits */
    grBits     = part2_start + (*si).ch[ch].gr[gr].part2_3_length;
    currentBit = hsstell();
    if ( currentBit < grBits )
	hgetbits( grBits - currentBit );

    /* Zero out rest */
    for (; i < SSLIMIT * SBLIMIT; i++)
	is[i / SSLIMIT][i % SSLIMIT] = 0;
}


static int pretab[22] = {0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,2,2,3,3,3,2,0};

void III_dequantize_sample(
    long int		 is[SBLIMIT][SSLIMIT],
    REAL		 xr[SBLIMIT][SSLIMIT],
    III_scalefac_t	*scalefac,
    struct gr_info_s	*gr_info,
    int			 ch,
    frame_params	*fr_ps)
{
    int			ss, sb, cb = 0, sfreq;
    int			next_cb_boundary, cb_begin = 0, cb_width = 0;
#if 0
/*#ifdef OPTIMIZE*/
    static REAL		globalGain;
    static int		lastGlobalGain = -1;

/* A pow(2, n) cache, where n = [-16..5] step 0.25 */
#define POW2CACHENEG (16*4)		/* # of cached numbers for n < 0   */
#define POW2CACHEPOS (5*4)		/* # of cached numbers for n > 0   */
static REAL pow2[POW2CACHENEG+POW2CACHEPOS+1] = { /* n < 0 + n > 0 + n = 0 */
/* <<n>>    <<pow(2,n)>> */
/*-16.00*/  0.00001526,  0.00001815,  0.00002158,  0.00002566,  0.00003052,
/*-14.75*/  0.00003629,  0.00004316,  0.00005132,  0.00006104,  0.00007258,
/*-13.50*/  0.00008632,  0.00010265,  0.00012207,  0.00014517,  0.00017263,
/*-12.25*/  0.00020530,  0.00024414,  0.00029033,  0.00034527,  0.00041059,
/*-11.00*/  0.00048828,  0.00058067,  0.00069053,  0.00082119,  0.00097656,
/* -9.75*/  0.00116134,  0.00138107,  0.00164238,  0.00195312,  0.00232267,
/* -8.50*/  0.00276214,  0.00328475,  0.00390625,  0.00464534,  0.00552427,
/* -7.25*/  0.00656950,  0.00781250,  0.00929068,  0.01104854,  0.01313901,
/* -6.00*/  0.01562500,  0.01858136,  0.02209709,  0.02627801,  0.03125000,
/* -4.75*/  0.03716272,  0.04419417,  0.05255603,  0.06250000,  0.07432544,
/* -3.50*/  0.08838835,  0.10511205,  0.12500000,  0.14865089,  0.17677670,
/* -2.25*/  0.21022410,  0.25000000,  0.29730178,  0.35355339,  0.42044821,
/* -1.00*/  0.50000000,  0.59460356,  0.70710678,  0.84089642,  1.00000000,
/*  0.25*/  1.18920712,  1.41421356,  1.68179283,  2.00000000,  2.37841423,
/*  1.50*/  2.82842712,  3.36358566,  4.00000000,  4.75682846,  5.65685425,
/*  2.75*/  6.72717132,  8.00000000,  9.51365692, 11.31370850, 13.45434264,
/*  4.00*/ 16.00000000, 19.02731384, 22.62741700, 26.90868529, 32.00000000
};

    /* Skip calculation unless value has changed */
    if (gr_info->global_gain != lastGlobalGain) {

	/* Compute overall (global) scaling */

	/* All vars are integers -> required accuracy = 0.25 */
	int n = gr_info->global_gain - 210;
	if (n >= -POW2CACHENEG && n <= POW2CACHEPOS)
	    globalGain = pow2[n + POW2CACHENEG];      /* pow2[60] = 2^0 = 1 */
	else {
	    globalGain = pow(2.0, (float)n * 0.25);
	    /*fprintf(stderr, "pow(2,n)[1] miss, n=%4.2f\n", (float)n * 0.25);*/
	}
	/* Original calculation */
	/* globalGain = pow(2.0, 0.25 * (gr_info->global_gain - 210.0)); */

	lastGlobalGain = gr_info->global_gain;
    }

    sfreq = fr_ps->header->sampling_frequency + (fr_ps->header->version * 3);

    /* Choose correct scalefactor band per block type, initalize boundary */

    if (gr_info->window_switching_flag && (gr_info->block_type == 2))
	if (gr_info->mixed_block_flag)
	    next_cb_boundary=sfBandIndex[sfreq].l[1];	/* LONG blocks: 0,1,3 */
	else {
	    next_cb_boundary=sfBandIndex[sfreq].s[1]*3;	/* Pure SHORT block */
	    cb_width = sfBandIndex[sfreq].s[1];
	    cb_begin = 0;
	} else
	    next_cb_boundary=sfBandIndex[sfreq].l[1];	/* LONG blocks: 0,1,3 */

    /* Apply formula per block type */

    for (sb = 0 ; sb < SBLIMIT ; sb++) {	/* 32 */
	for (ss = 0 ; ss < SSLIMIT ; ss++) {	/* 18 */

	    int		insample;
	    REAL	outsample;

	    if ((sb * 18) + ss == next_cb_boundary) {
		/* Adjust critical band boundary */

		if (gr_info->window_switching_flag &&
					(gr_info->block_type == 2)) {
		    if (gr_info->mixed_block_flag)  {
			if (((sb * 18) + ss) == sfBandIndex[sfreq].l[8]) {
			    next_cb_boundary=sfBandIndex[sfreq].s[4] * 3;
			    cb = 3;
			    cb_width = sfBandIndex[sfreq].s[cb+1] -
					sfBandIndex[sfreq].s[cb];
			    cb_begin = sfBandIndex[sfreq].s[cb] * 3;
			} else if (((sb * 18) + ss) < sfBandIndex[sfreq].l[8])
			    next_cb_boundary = sfBandIndex[sfreq].l[(++cb) + 1];
			else {
			    next_cb_boundary =
					sfBandIndex[sfreq].s[(++cb) + 1] * 3;
			    cb_width = sfBandIndex[sfreq].s[cb + 1] -
					sfBandIndex[sfreq].s[cb];
			    cb_begin = sfBandIndex[sfreq].s[cb] * 3;
			}
		    } else {
			next_cb_boundary = sfBandIndex[sfreq].s[(++cb) + 1] * 3;
			cb_width = sfBandIndex[sfreq].s[cb+1] -
					sfBandIndex[sfreq].s[cb];
			cb_begin = sfBandIndex[sfreq].s[cb]*3;
		    }

		} else /* long blocks */

		    next_cb_boundary = sfBandIndex[sfreq].l[(++cb) + 1];
	    }

	    /* Do long/short dependent scaling operations */

	    /* Initialize to overall (global) scaling */
	    outsample = globalGain;

	    if (gr_info->window_switching_flag && (
			((gr_info->block_type == 2) &&
			(gr_info->mixed_block_flag == 0)) ||
			((gr_info->block_type == 2) &&
			gr_info->mixed_block_flag && (sb >= 2)) )) {

		/* All vars are integers -> required accuracy = 2 */
		/* Ending expression = 1 should return pow(2, -2) */
		int n = 4 * (-2 * gr_info->subblock_gain[(((sb * 18) + ss) -
				cb_begin) / cb_width]);

		if (n >= -POW2CACHENEG && n <= POW2CACHEPOS)
		    outsample *= pow2[n + POW2CACHENEG];
		else {
		    outsample *= pow(2.0, (float)n * 0.25);
		    /*fprintf(stderr, "pow(2,n)[2] miss, n=%4.2f\n", n*0.25);*/
		}

		/* All vars are integers -> required accuracy = 0.5 */
		/* Ending expression = 1 should return pow(2, -0.5) */
		n = (int)-2 * ((1.0 + gr_info->scalefac_scale) *
		     (*scalefac)[ch].s[(((sb * 18) + ss) -
		     cb_begin) / cb_width][cb]);

		if (n >= -POW2CACHENEG && n <= POW2CACHEPOS)
		    outsample *= pow2[n + POW2CACHENEG];
		else {
		    outsample *= pow(2.0, (float)n * 0.25);
		    /*fprintf(stderr, "pow(2,n)[3] miss, n=%4.2f\n", n*0.25);*/
		}

	    } else {	/* LONG block types 0,1,3 & */
			/* 1st 2 subbands of switched blocks */

		/* All vars are integers -> required accuracy = 0.5 */
		/* Ending expression = 1 should return pow(2, -0.5) */
		int n;
		n = (int)-2 * ((1.0 + gr_info->scalefac_scale) *
		     ((*scalefac)[ch].l[cb] +
		     gr_info->preflag * pretab[cb]));

		if (n >= -POW2CACHENEG && n <= POW2CACHEPOS)
		    outsample *= pow2[n + POW2CACHENEG];
		else {
		    if (n > -1000) {
			outsample *= pow(2.0, (float)n * 0.5);
			/* fprintf(stderr, "pow(2,n)[4] miss, n=%4.2f\n", */
			/*	(float)n  -0.5); */
		    } else
			/* Bug in the implementation here? x sometimes */
			/* underflows like -2009044012. Then mute. */
			outsample = 0.0;
		}
	    }

	    /* Scale quantized value */
	    insample = is[sb][ss];		/* read input array once */
	    if (insample == 0)
		outsample = 0.0;		/* optimize no input */
	    else {

/* A pow(n, 4/3) cache, where n = [1..100] step 1 */
#define POWCACHED 100	/* biggest number that is cached */
static REAL powc[POWCACHED] = {
/* 1.00*/   1.00000000,   2.51984210,   4.32674871,   6.34960421,   8.54987973,
/* 6.00*/  10.90272356,  13.39051828,  16.00000000,  18.72075441,  21.54434690,
/*11.00*/  24.46378100,  27.47314182,  30.56735094,  33.74199170,  36.99318111,
/*16.00*/  40.31747360,  43.71178704,  47.17334510,  50.69963133,  54.28835233,
/*21.00*/  57.93740770,  61.64486527,  65.40894054,  69.22797937,  73.10044346,
/*26.00*/  77.02489778,  81.00000000,  85.02449121,  89.09718794,  93.21697518,
/*31.00*/  97.38280022, 101.59366733, 105.84863289, 110.14680124, 114.48732086,
/*36.00*/ 118.86938096, 123.29220851, 127.75506546, 132.25724628, 136.79807573,
/*41.00*/ 141.37690686, 145.99311909, 150.64611660, 155.33532675, 160.06019870,
/*46.00*/ 164.82020207, 169.61482577, 174.44357691, 179.30597979, 184.20157493,
/*51.00*/ 189.12991823, 194.09058015, 199.08314497, 204.10721008, 209.16238534,
/*56.00*/ 214.24829247, 219.36456448, 224.51084516, 229.68678854, 234.89205847,
/*61.00*/ 240.12632817, 245.38927980, 250.68060410, 256.00000000, 261.34717431,
/*66.00*/ 266.72184136, 272.12372273, 277.55254693, 283.00804915, 288.48997099,
/*71.00*/ 293.99806021, 299.53207052, 305.09176134, 310.67689758, 316.28724949,
/*76.00*/ 321.92259240, 327.58270661, 333.26737717, 338.97639374, 344.70955041,
/*81.00*/ 350.46664558, 356.24748183, 362.05186573, 367.87960775, 373.73052213,
/*86.00*/ 379.60442677, 385.50114309, 391.42049594, 397.36231351, 403.32642719,
/*91.00*/ 409.31267152, 415.32088406, 421.35090534, 427.40257871, 433.47575036,
/*96.00*/ 439.57026914, 445.68598654, 451.82275662, 457.98043591, 464.15888336
};
		int absSample = abs(insample);

		if (absSample <= POWCACHED)
		    outsample *= powc[absSample - 1];
		else
		    outsample *= pow(absSample, 4.0/3.0);
		if (insample < 0)
		    outsample = -outsample;	/* change the sign */
	    }
	    xr[sb][ss] = outsample;		/* touch output array once */
	}
    }

#else
    /* Original code */

    int		sign;

    sfreq = fr_ps->header->sampling_frequency + (fr_ps->header->version * 3);

    /* Choose correct scalefactor band per block type, initalize boundary */

    if (gr_info->window_switching_flag && (gr_info->block_type == 2))
	if (gr_info->mixed_block_flag)
	    next_cb_boundary=sfBandIndex[sfreq].l[1];	/* LONG blocks: 0,1,3 */
	else {
	    next_cb_boundary=sfBandIndex[sfreq].s[1]*3;	/* Pure SHORT block */
	    cb_width = sfBandIndex[sfreq].s[1];
	    cb_begin = 0;
	} else
	    next_cb_boundary=sfBandIndex[sfreq].l[1];	/* LONG blocks: 0,1,3 */

    /* Apply formula per block type */

    for (sb = 0 ; sb < SBLIMIT ; sb++) {
	for (ss = 0 ; ss < SSLIMIT ; ss++) {
	    if ((sb*18) + ss == next_cb_boundary) {
		/* Adjust critical band boundary */

		if (gr_info->window_switching_flag &&
					(gr_info->block_type == 2)) {
		    if (gr_info->mixed_block_flag)  {
			if (((sb*18)+ss) == sfBandIndex[sfreq].l[8]) {
			    next_cb_boundary=sfBandIndex[sfreq].s[4]*3;
			    cb = 3;
			    cb_width = sfBandIndex[sfreq].s[cb+1] -
					sfBandIndex[sfreq].s[cb];
			    cb_begin = sfBandIndex[sfreq].s[cb]*3;
			} else if (((sb*18)+ss) < sfBandIndex[sfreq].l[8])
			    next_cb_boundary = sfBandIndex[sfreq].l[(++cb)+1];
			else {
			    next_cb_boundary = sfBandIndex[sfreq].s[(++cb)+1]*3;
			    cb_width = sfBandIndex[sfreq].s[cb+1] -
					sfBandIndex[sfreq].s[cb];
			    cb_begin = sfBandIndex[sfreq].s[cb]*3;
			}
		    } else {
			next_cb_boundary = sfBandIndex[sfreq].s[(++cb)+1]*3;
			cb_width = sfBandIndex[sfreq].s[cb+1] -
					sfBandIndex[sfreq].s[cb];
			cb_begin = sfBandIndex[sfreq].s[cb]*3;
		    }

		} else /* long blocks */

		    next_cb_boundary = sfBandIndex[sfreq].l[(++cb)+1];
	    }

	    /* Compute overall (global) scaling */

	    xr[sb][ss] = pow(2.0, (0.25 * (gr_info->global_gain - 210.0)));

	    /* Do long/short dependent scaling operations */

	    if (gr_info->window_switching_flag && (
			((gr_info->block_type == 2) &&
			(gr_info->mixed_block_flag == 0)) ||
			((gr_info->block_type == 2) &&
			gr_info->mixed_block_flag && (sb >= 2)) )) {

		xr[sb][ss] *= pow(2.0, 0.25 * -8.0 * gr_info->subblock_gain[
				(((sb*18)+ss) - cb_begin)/cb_width]);

		xr[sb][ss] *= pow(2.0, 0.25 * -2.0 *
			(1.0+gr_info->scalefac_scale) *
			(*scalefac)[ch].s[(((sb*18)+ss) -
			cb_begin)/cb_width][cb]);

	    } else {	/* LONG block types 0,1,3 & */
			/* 1st 2 subbands of switched blocks */

		xr[sb][ss] *= pow(2.0, -0.5 * (1.0+gr_info->scalefac_scale)
					* ((*scalefac)[ch].l[cb]
					+ gr_info->preflag * pretab[cb]));
	    }

	    /* Scale quantized value */
	    sign = (is[sb][ss]<0) ? 1 : 0;
	    xr[sb][ss] *= pow(abs(is[sb][ss]), 4.0/3.0);
	    if (sign)
		xr[sb][ss] = -xr[sb][ss];
	}
    }
#endif /* OPTIMIZE */
}


void III_reorder (
    REAL		 xr[SBLIMIT][SSLIMIT],
    REAL		 ro[SBLIMIT][SSLIMIT],
    struct gr_info_s	*gr_info,
    frame_params	*fr_ps)
{
    int		sfreq;
    int		sfb, sfb_start, sfb_lines;
    int		sb, ss, window, freq, src_line, des_line;

    sfreq=fr_ps->header->sampling_frequency + (fr_ps->header->version * 3);

#ifdef OPTIMIZE

    if (gr_info->window_switching_flag && (gr_info->block_type == 2)) {

	/* Clear the whole array */
	memset((void *)&ro[0][0], 0, sizeof(REAL)*SBLIMIT*SSLIMIT);

	if (gr_info->mixed_block_flag) {
	    /* No reorder for low 2 subbands */
	    for (sb=0 ; sb < 2 ; sb++)
		for (ss=0 ; ss < SSLIMIT ; ss++) {
		    ro[sb][ss] = xr[sb][ss];
		}
	    /* Reordering for rest switched short */
	    for (sfb = 3, sfb_start = sfBandIndex[sfreq].s[3],
			sfb_lines=sfBandIndex[sfreq].s[4] - sfb_start;
			sfb < 13; sfb++,sfb_start=sfBandIndex[sfreq].s[sfb],
			(sfb_lines=sfBandIndex[sfreq].s[sfb+1] - sfb_start))
		for(window=0; window<3; window++)
		    for(freq=0;freq<sfb_lines;freq++) {
			src_line = sfb_start*3 + window*sfb_lines + freq;
			des_line = (sfb_start*3) + window + (freq*3);
			ro[des_line/SSLIMIT][des_line%SSLIMIT] =
				xr[src_line/SSLIMIT][src_line%SSLIMIT];
		    }

	} else {	/* pure short */

	    for (sfb=0,sfb_start=0,sfb_lines=sfBandIndex[sfreq].s[1];
			sfb < 13; sfb++,sfb_start=sfBandIndex[sfreq].s[sfb],
			(sfb_lines=sfBandIndex[sfreq].s[sfb+1] - sfb_start))
		for (window=0; window<3; window++)
		    for(freq=0;freq<sfb_lines;freq++) {
			src_line = sfb_start*3 + window*sfb_lines + freq;
			des_line = (sfb_start*3) + window + (freq*3);
			ro[des_line/SSLIMIT][des_line%SSLIMIT] =
				xr[src_line/SSLIMIT][src_line%SSLIMIT];
		    }
	}

    } else {	/* long blocks */

	/* Copy array xr[] to ro[] */
	memcpy((void *)&ro[0][0], (void *)&xr[0][0],
		sizeof(REAL)*SBLIMIT*SSLIMIT);
    }

#else
    /* Original code */

    for (sb=0; sb<SBLIMIT; sb++)
	for (ss=0; ss<SSLIMIT; ss++)
	    ro[sb][ss] = 0.0;

    if (gr_info->window_switching_flag && (gr_info->block_type == 2)) {
	if (gr_info->mixed_block_flag) {
	    /* No reorder for low 2 subbands */
	    for (sb=0 ; sb < 2 ; sb++)
		for (ss=0 ; ss < SSLIMIT ; ss++) {
		    ro[sb][ss] = xr[sb][ss];
		}
	    /* Reordering for rest switched short */
	    for (sfb = 3, sfb_start = sfBandIndex[sfreq].s[3],
			sfb_lines=sfBandIndex[sfreq].s[4] - sfb_start;
			sfb < 13; sfb++,sfb_start=sfBandIndex[sfreq].s[sfb],
			(sfb_lines=sfBandIndex[sfreq].s[sfb+1] - sfb_start))
		for(window=0; window<3; window++)
		    for(freq=0;freq<sfb_lines;freq++) {
			src_line = sfb_start*3 + window*sfb_lines + freq;
			des_line = (sfb_start*3) + window + (freq*3);
			ro[des_line/SSLIMIT][des_line%SSLIMIT] =
				xr[src_line/SSLIMIT][src_line%SSLIMIT];
		    }

	} else {	/* pure short */

	    for (sfb=0,sfb_start=0,sfb_lines=sfBandIndex[sfreq].s[1];
			sfb < 13; sfb++,sfb_start=sfBandIndex[sfreq].s[sfb],
			(sfb_lines=sfBandIndex[sfreq].s[sfb+1] - sfb_start))
		for (window=0; window<3; window++)
		    for(freq=0;freq<sfb_lines;freq++) {
			src_line = sfb_start*3 + window*sfb_lines + freq;
			des_line = (sfb_start*3) + window + (freq*3);
			ro[des_line/SSLIMIT][des_line%SSLIMIT] =
				xr[src_line/SSLIMIT][src_line%SSLIMIT];
		    }
	}

    } else {	/* long blocks */

	for (sb=0 ; sb < SBLIMIT ; sb++)
	    for (ss=0 ; ss < SSLIMIT ; ss++)
		ro[sb][ss] = xr[sb][ss];
    }
#endif /* OPTIMIZE */
}


static void III_i_stereo_k_values(
    int		is_pos,
    REAL	io,
    int		i,
    REAL FAR	k[2][576])
{
#ifdef OPTIMIZE
    if (is_pos == 0) {
	k[0][i] = k[1][i] = 1.0;
    } else if (is_pos & 1) {
	k[1][i] = 1.0;
	if (is_pos == 1)
	    k[0][i] = io;		/* pow(n,1) */
	else if (is_pos == 3)
	    k[0][i] = io * io;		/* pow(n,2) */
	else if (is_pos == 5)
	    k[0][i] = io * io * io;	/* pow(n,3) */
	else {
	    k[0][i] = pow(io, (float)(is_pos + 1)/2.0);
	    /*printf("pow(%4.2f, %4.2f) ", io, (float)(is_pos + 1)/2.0);
	    fflush(stdout);*/
	}
    } else {
	k[0][i] = 1.0;
	if (is_pos == 2)
	    k[1][i] = io;		/* pow(n,1) */
	else if (is_pos == 4)
	    k[1][i] = io * io;		/* pow(n,2) */
	else if (is_pos == 6)
	    k[1][i] = io * io * io;	/* pow(n,3) */
	else {
	    k[1][i] = pow(io, (float)is_pos/2.0);
	    /*printf("pow(%4.2f, %4.2f) ", io, (float)is_pos/2.0);
	    fflush(stdout);*/
	}
    }

#else /* the orginal code */

    if (is_pos == 0) {
	k[0][i] = 1.0;
	k[1][i] = 1.0;
    } else if ((is_pos % 2) == 1) {
	k[0][i] = pow(io, (float)(is_pos + 1)/2.0);
	k[1][i] = 1.0;
    } else {
	k[0][i] = 1.0;
	k[1][i] = pow(io, (float)is_pos/2.0);
    }
#endif /* OPTIMIZE */
}


void III_stereo(
    REAL		 xr[2][SBLIMIT][SSLIMIT],
    REAL		 lr[2][SBLIMIT][SSLIMIT],
    III_scalefac_t	*scalefac,
    struct gr_info_s	*gr_info,
    frame_params	*fr_ps)
{
    int			sfreq;
    int			stereo = fr_ps->stereo;
    int			ms_stereo =
				(fr_ps->header->mode == MPG_MD_JOINT_STEREO) &&
				(fr_ps->header->mode_ext & 0x2);
    int			i_stereo =
				(fr_ps->header->mode == MPG_MD_JOINT_STEREO) &&
				(fr_ps->header->mode_ext & 0x1);
    int			sfb;
    int			i, j, sb, ss, ch;
    REAL		io;
#ifdef OPTIMIZE
    static short	is_pos[SBLIMIT*SSLIMIT];
    static REAL		is_ratio[SBLIMIT*SSLIMIT];
    static REAL FAR	k[2][SBLIMIT*SSLIMIT];

#define QUICKTAN(dest, src) \
	if (((src)) == 1) \
	    (dest) = 0.267949192431; \
	else if (((src)) == 2) \
	    (dest) = 0.577350269189; \
	else if (((src)) == 3) \
	    (dest) = 1.0; \
	else if (((src)) == 4) \
	    (dest) = 1.73205080758; \
	else { \
	    (dest)= tan((float)(src) * (PI / 12)); \
	    /*printf("tan(%d) ", (src)); fflush(stdout);*/ \
	}
#else
    short		is_pos[SBLIMIT*SSLIMIT];
    REAL		is_ratio[SBLIMIT*SSLIMIT];
    REAL FAR		k[2][SBLIMIT*SSLIMIT];
#endif

    int lsf = (fr_ps->header->version == MPEG_PHASE2_LSF);

    if ((gr_info->scalefac_compress % 2) == 1)
	io = 0.707106781188;
    else
	io = 0.840896415256;

    sfreq = fr_ps->header->sampling_frequency + (fr_ps->header->version * 3);

    /* Intialization */
    for (i = 0; i < SBLIMIT*SSLIMIT; i++ )
	is_pos[i] = 7;

    if ((stereo == 2) && i_stereo ) {
	if (gr_info->window_switching_flag && (gr_info->block_type == 2)) {
	    if( gr_info->mixed_block_flag ) {
		int max_sfb = 0;

		for ( j=0; j<3; j++ ) {
		    int sfbcnt;
		    sfbcnt = 2;
		    for (sfb=12; sfb >=3; sfb--) {
			int lines;
			lines = sfBandIndex[sfreq].s[sfb+1] -
				sfBandIndex[sfreq].s[sfb];
			i = 3*sfBandIndex[sfreq].s[sfb] + (j+1) * lines - 1;
			while (lines > 0) {
			    if (xr[1][i/SSLIMIT][i%SSLIMIT] != 0.0) {
				sfbcnt = sfb;
				sfb = -10;
				lines = -10;
			    }
			    lines--;
			    i--;
			}
		    }
		    sfb = sfbcnt + 1;

		    if ( sfb > max_sfb )
			max_sfb = sfb;

		    while (sfb<12) {
			sb = sfBandIndex[sfreq].s[sfb+1] -
				sfBandIndex[sfreq].s[sfb];
			i = 3*sfBandIndex[sfreq].s[sfb] + j * sb;
			for ( ; sb > 0; sb--) {
			    is_pos[i] = (*scalefac)[1].s[j][sfb];
			    if ( is_pos[i] != 7 )
				if (lsf) {
				    III_i_stereo_k_values(is_pos[i],io,i,k);
				} else {
#ifdef OPTIMIZE
				    QUICKTAN(is_ratio[i], is_pos[i])
#else
				    is_ratio[i] = tan((float)is_pos[i] *
						(PI / 12));
#endif
				}
			    i++;
			}
			sfb++;
		    }

		    sb = sfBandIndex[sfreq].s[12]-sfBandIndex[sfreq].s[11];
		    sfb = 3*sfBandIndex[sfreq].s[11] + j * sb;
		    sb = sfBandIndex[sfreq].s[13]-sfBandIndex[sfreq].s[12];

		    i = 3*sfBandIndex[sfreq].s[11] + j * sb;
		    for ( ; sb > 0; sb-- ) {
			is_pos[i] = is_pos[sfb];
			is_ratio[i] = is_ratio[sfb];
			k[0][i] = k[0][sfb];
			k[1][i] = k[1][sfb];
			i++;
		    }
		}
		if ( max_sfb <= 3 ) {
		    i = 2;
		    ss = 17;
		    sb = -1;
		    while ( i >= 0 ) {
			if ( xr[1][i][ss] != 0.0 ) {
			    sb = i*18+ss;
			    i = -1;
			} else {
			    ss--;
			    if ( ss < 0 ) {
				i--;
				ss = 17;
			    }
			}
		    }
		    i = 0;
		    while ( sfBandIndex[sfreq].l[i] <= sb )
			i++;
		    sfb = i;
		    i = sfBandIndex[sfreq].l[i];
		    for ( ; sfb<8; sfb++ ) {
			sb = sfBandIndex[sfreq].l[sfb+1]-sfBandIndex[sfreq].l[sfb];
			for ( ; sb > 0; sb--) {
			    is_pos[i] = (*scalefac)[1].l[sfb];
			    if ( is_pos[i] != 7 )
				if ( lsf ) {
				    III_i_stereo_k_values(is_pos[i],io,i,k);
				} else {
#ifdef OPTIMIZE
				    QUICKTAN(is_ratio[i], is_pos[i])
#else
				    is_ratio[i] = tan((float)is_pos[i] *
						(PI / 12));
#endif
				}
			    i++;
			}
		    }
		}

	    } else {

		for ( j=0; j<3; j++ ) {
		    int sfbcnt;
		    sfbcnt = -1;
		    for( sfb=12; sfb >=0; sfb-- ) {
			int lines;
			lines = sfBandIndex[sfreq].s[sfb+1]-sfBandIndex[sfreq].s[sfb];
			i = 3*sfBandIndex[sfreq].s[sfb] + (j+1) * lines - 1;
			while ( lines > 0 ) {
			    if ( xr[1][i/SSLIMIT][i%SSLIMIT] != 0.0 ) {
				sfbcnt = sfb;
				sfb = -10;
				lines = -10;
			    }
			    lines--;
			    i--;
			}
		    }
		    sfb = sfbcnt + 1;
		    while ( sfb<12 ) {
			sb = sfBandIndex[sfreq].s[sfb+1]-sfBandIndex[sfreq].s[sfb];
			i = 3*sfBandIndex[sfreq].s[sfb] + j * sb;
			for ( ; sb > 0; sb--) {
			    is_pos[i] = (*scalefac)[1].s[j][sfb];
			    if ( is_pos[i] != 7 )
				if( lsf ) {
				    III_i_stereo_k_values(is_pos[i],io,i,k);
				} else {
#ifdef OPTIMIZE
				    QUICKTAN(is_ratio[i], is_pos[i])
#else
				    is_ratio[i] = tan((float)is_pos[i] *
							(PI / 12));
#endif
				}
			    i++;
			}
			sfb++;
		    }

		    sb = sfBandIndex[sfreq].s[12]-sfBandIndex[sfreq].s[11];
		    sfb = 3*sfBandIndex[sfreq].s[11] + j * sb;
		    sb = sfBandIndex[sfreq].s[13]-sfBandIndex[sfreq].s[12];

		    i = 3*sfBandIndex[sfreq].s[11] + j * sb;
		    for ( ; sb > 0; sb-- ) {
			is_pos[i] = is_pos[sfb];
			is_ratio[i] = is_ratio[sfb];
			k[0][i] = k[0][sfb];
			k[1][i] = k[1][sfb];
			i++;
		    }
		}
	    }
	} else {
	    i = 31;
	    ss = 17;
	    sb = 0;
	    while ( i >= 0 ) {
		if ( xr[1][i][ss] != 0.0 ) {
		    sb = i*18+ss;
		    i = -1;
		} else {
		    ss--;
		    if ( ss < 0 ) {
			i--;
			ss = 17;
		    }
		}
	    }
	    i = 0;
	    while ( sfBandIndex[sfreq].l[i] <= sb )
		i++;
	    sfb = i;
	    i = sfBandIndex[sfreq].l[i];
	    for ( ; sfb<21; sfb++ ) {
		sb = sfBandIndex[sfreq].l[sfb+1] - sfBandIndex[sfreq].l[sfb];
		for ( ; sb > 0; sb--) {
		    is_pos[i] = (*scalefac)[1].l[sfb];
		    if ( is_pos[i] != 7 )
			if( lsf ) {
			    III_i_stereo_k_values(is_pos[i],io,i,k);
			} else {
#ifdef OPTIMIZE
			    QUICKTAN(is_ratio[i], is_pos[i])
#else
			    is_ratio[i] = tan((float)is_pos[i] * (PI / 12));
#endif
			}
		    i++;
		}
	    }
	    sfb = sfBandIndex[sfreq].l[20];
	    for ( sb = 576 - sfBandIndex[sfreq].l[21]; sb > 0; sb-- ) {
		is_pos[i] = is_pos[sfb];
		is_ratio[i] = is_ratio[sfb];
		k[0][i] = k[0][sfb];
		k[1][i] = k[1][sfb];
		i++;
	    }
	}
    }

    for (ch = 0; ch < 2; ch++)
	for (sb = 0; sb < SBLIMIT; sb++)
	    for (ss = 0; ss < SSLIMIT; ss++)
		lr[ch][sb][ss] = 0;

    if (stereo == 2)
	for(sb = 0; sb < SBLIMIT; sb++)
	    for(ss = 0; ss < SSLIMIT; ss++) {
		i = (sb*18)+ss;
		if ( is_pos[i] == 7 ) {
		    if ( ms_stereo ) {
			lr[0][sb][ss] = (xr[0][sb][ss]+xr[1][sb][ss])/1.41421356;
			lr[1][sb][ss] = (xr[0][sb][ss]-xr[1][sb][ss])/1.41421356;
		    } else {
			lr[0][sb][ss] = xr[0][sb][ss];
			lr[1][sb][ss] = xr[1][sb][ss];
		    }
		} else if (i_stereo ) {
		    if ( lsf ) {
			lr[0][sb][ss] = xr[0][sb][ss] * k[0][i];
			lr[1][sb][ss] = xr[0][sb][ss] * k[1][i];
		    } else {
			lr[0][sb][ss] = xr[0][sb][ss] * (is_ratio[i]/(1+is_ratio[i]));
			lr[1][sb][ss] = xr[0][sb][ss] * (1/(1+is_ratio[i]));
		    }
		} else {
		    fprintf(stderr, "Error in stereo processing\n");
		}
	    }

    else	/* mono , bypass xr[0][][] to lr[0][][]*/

	for (sb = 0; sb < SBLIMIT; sb++)
	    for(ss = 0; ss < SSLIMIT; ss++)
		lr[0][sb][ss] = xr[0][sb][ss];
}

REAL Ci[8] = {-0.6,-0.535,-0.33,-0.185,-0.095,-0.041,-0.0142,-0.0037};


void III_antialias(
    REAL		 xr[SBLIMIT][SSLIMIT],
    REAL		 hybridIn[SBLIMIT][SSLIMIT],
    struct gr_info_s	*gr_info,
    frame_params	*fr_ps)
{
    static int	  init = 1;
    static REAL	  ca[8], cs[8];
    REAL	  bu, bd;	/* upper and lower butterfly inputs */
    int 	  ss, sb, sblim;

    if (init == 1) {
	int	i;
	REAL	sq;
	for (i = 0; i < 8; i++) {
	    sq = sqrt(1.0 + Ci[i] * Ci[i]);
	    cs[i] = 1.0 / sq;
	    ca[i] = Ci[i] / sq;
	}
	init = 0;
    }

    /* Clear all inputs */
    for(sb=0;sb<SBLIMIT;sb++)
	for(ss=0;ss<SSLIMIT;ss++)
	    hybridIn[sb][ss] = xr[sb][ss];

    if  (gr_info->window_switching_flag && (gr_info->block_type == 2) &&
			!gr_info->mixed_block_flag )
	return;

    if ( gr_info->window_switching_flag && gr_info->mixed_block_flag &&
			(gr_info->block_type == 2))
	sblim = 1;
    else
	sblim = SBLIMIT-1;

    /*
     * 31 alias-reduction operations between each pair of sub-bands
     * with 8 butterflies between each pair
     */
    for (sb = 0; sb < sblim; sb++)
	for (ss = 0; ss < 8; ss++) {
	    bu = xr[sb][17-ss];
	    bd = xr[sb+1][ss];
	    hybridIn[sb][17-ss] = (bu * cs[ss]) - (bd * ca[ss]);
	    hybridIn[sb+1][ss] = (bd * cs[ss]) + (bu * ca[ss]);
	}
}


/*------------------------------------------------------------------*
 *								    *
 *    Function: Calculation of the inverse MDCT 		    *
 *    In the case of short blocks the 3 output vectors are already  *
 *    overlapped and added in this module.			    *
 *								    *
 *    New layer3                                                    *
 *								    *
 *------------------------------------------------------------------*/
void inv_mdct(
    REAL		in[18],
    REAL		out[36],
    int			block_type)
{
    int			i, m, N, p;
    static REAL		win[4][36];
    static int		init = 1;
    REAL		sum;
#ifdef OPTIMIZE
    static REAL		const1[12][6];
    static REAL		const3[36][18];
#else
    REAL		tmp[12];
    static REAL		COS[4 * 36];
#endif

    if (init == 1) {

	/* Type 0 */
	for (i = 0; i < 36; i++)
	    win[0][i] = sin(PI36 * (i + 0.5));

	/* Type 1 */
	for (i = 0; i < 18; i++)
	    win[1][i] = sin(PI36 * (i + 0.5));
	for (i = 18; i < 24; i++)
	    win[1][i] = 1.0;
	for (i = 24; i < 30; i++)
	    win[1][i] = sin(PI12 * (i + 0.5 - 18));
	for (i = 30; i < 36; i++)
	    win[1][i] = 0.0;

	/* Type 3 */
	for (i = 0; i < 6; i++)
	    win[3][i] = 0.0;
	for (i = 6; i < 12; i++)
	    win[3][i] = sin(PI12 * (i + 0.5 - 6));
	for (i = 12; i < 18; i++)
	    win[3][i] = 1.0;
	for (i = 18; i < 36; i++)
	    win[3][i] = sin(PI36 * (i + 0.5));

	/* Type 2 */
	for (i = 0; i < 12; i++)
	    win[2][i] = sin(PI12 * (i + 0.5)) ;
	for (i = 12; i < 36; i++)
	    win[2][i] = 0.0 ;

#ifdef OPTIMIZE

	/* Pre-calculate cosine array */
	N = 12;
	for (p = 0; p < N; p++)
	    for (m = 0; m < N/2; m++)
		const1[p][m] = cos((PI/(2*N)) * (2*p+1+N/2) * (2*m+1));

	/*
	 *  Create an optimized cosine lookup table
	 *  for the block_type != 2 case
	 */
	for (p = 0; p < 36; p++) {
	    int pconst = (p * 2) + 19;

	    for (m = 0; m < 18; m++)
		/* Calculate cosine value for this position */
		const3[p][m] = cos(PI/(2*36) *
			      ((pconst * ((m << 1) + 1)) % 144));
	}
	init = 0;
    }

    if (block_type == 2) {

	/* Clear the out[] array */
	memset((void *)&out[0], 0, sizeof(REAL) * 36);

	/*
	 * Optimization I
	 *
	 * for (i = 0; i < 3; i++) {
	 *    for (p = 0; p < 12; p++) {
	 *	sum = in[i] * const1[p][0] +
	 *	    in[i + 3] * const1[p][1] +
	 *	    in[i + 6] * const1[p][2] +
	 *	    in[i + 9] * const1[p][3] +
	 *	    in[i + 12] * const1[p][4] +
	 *	    in[i + 15] * const1[p][5];
	 *	out[6 * i + p + 6] += sum * win[2][p];
	 *    }
	 * }
	 */

	/*
	 * Optimization II
	 */
	for (p = 0; p < 12; p++) {
	    sum = in[0] * const1[p][0] +
		in[3] * const1[p][1] +
		in[6] * const1[p][2] +
		in[9] * const1[p][3] +
		in[12] * const1[p][4] +
		in[15] * const1[p][5];
	    out[p + 6] += sum * win[2][p];
	}
	for (p = 0; p < 12; p++) {
	    sum = in[1] * const1[p][0] +
		in[4] * const1[p][1] +
		in[7] * const1[p][2] +
		in[10] * const1[p][3] +
		in[13] * const1[p][4] +
		in[16] * const1[p][5];
	    out[p + 12] += sum * win[2][p];
	}
	for (p = 0; p < 12; p++) {
	    sum = in[2] * const1[p][0] +
		in[5] * const1[p][1] +
		in[8] * const1[p][2] +
		in[11] * const1[p][3] +
		in[14] * const1[p][4] +
		in[17] * const1[p][5];
	    out[p + 18] += sum * win[2][p];
	}

    } else { /* block_type != 2 */

	/*
	 * Optimization I
	 *
	 * for (p = 0; p < 36; p++) {
	 *    sum = 0.0;
	 *
	 *    for (m = 0; m < 18; m++)
	 *	 sum += in[m] * const3[p][m];
	 *    out[p] = sum * win[block_type][p];
	 * }
	 */

	/*
	 * Optimization II
	 */
	int nzcount, nzin[18];

	for (p = 0, nzcount = 0; p < 18; p++)
	    if (in[p] < -0.0004 || in[p] > 0.0004)	/* analyze input */
		nzin[nzcount++] = p;

	for (p = 0; p < 36; p++) {
	    sum = 0.0;

	    for (m = 0; m < nzcount; m++)
		sum += in[nzin[m]] * const3[p][nzin[m]];
	    out[p] = sum * win[block_type][p];
	}
    }

#else /* original code */

	for (i = 0; i < 4*36; i++)
	    COS[i] = cos(PI/(2*36) * i);

	init = 0;
    }

    for (i = 0; i < 36; i++)
	out[i]=0;

    if (block_type == 2){
	N = 12;
	for (i=0; i<3; i++){
	    for (p = 0; p<N; p++){
		sum = 0.0;
		for(m=0; m<N/2; m++)
		    sum += in[i+3*m] * cos( PI/(2*N)*(2*p+1+N/2)*(2*m+1) );
		tmp[p] = sum * win[block_type][p] ;
	    }
	    for (p=0; p<N; p++)
		out[6*i+p+6] += tmp[p];
       }
    } else {
	N = 36;
	for (p=0; p<N; p++){
	    sum = 0.0;
	    for (m=0; m < N/2; m++)
		sum += in[m] * COS[((2*p+1+N/2)*(2*m+1))%(4*36)];
	    out[p] = sum * win[block_type][p];
	}
    }
#endif /* OPTIMIZE */
}



void III_hybrid(
    REAL		 fsIn[SSLIMIT],   /* freq samples per subband in */
    REAL		 tsOut[SSLIMIT],  /* time samples per subband out */
    int			 sb,
    int			 ch,
    struct gr_info_s	*gr_info,
    frame_params	*fr_ps)
{
    int		 ss;
#ifdef OPTIMIZE
    static REAL	 rawout[36];
#else
    REAL	 rawout[36];
#endif
    static REAL	 prevblck[2][SBLIMIT][SSLIMIT];
    static int	 init = 1;
    int		 bt;

    if (init == 1) {
	int i, j, k;

	for (i = 0; i < 2; i++)
	    for (j = 0; j < SBLIMIT; j++)
		for (k = 0; k < SSLIMIT; k++)
		    prevblck[i][j][k] = 0.0;
	init = 0;
    }

    bt = (gr_info->window_switching_flag && gr_info->mixed_block_flag &&
		(sb < 2)) ? 0 : gr_info->block_type;

    inv_mdct(fsIn, rawout, bt);

    /* Overlap addition */
    for (ss = 0; ss < SSLIMIT; ss++) {		/* 18 */
	tsOut[ss] = rawout[ss] + prevblck[ch][sb][ss];
	prevblck[ch][sb][ss] = rawout[ss+18];
    }
}


/* Return the number of slots for main data of current frame */

int main_data_slots(frame_params fr_ps)
{
    int nSlots =
	(144 * bitrate[fr_ps.header->version][2][fr_ps.header->bitrate_index]) /
	s_freq[fr_ps.header->version][fr_ps.header->sampling_frequency];

    if (fr_ps.header->version != MPEG_PHASE2_LSF) {
	if (fr_ps.stereo == 1)
	    nSlots -= 17;
	else
	    nSlots -=32;
    } else {
	nSlots = nSlots / 2;
	if (fr_ps.stereo == 1)
	    nSlots -= 9;
	else
	    nSlots -=17;
    }

    if (fr_ps.header->padding)
	nSlots++;
    nSlots -= 4;
    if (fr_ps.header->error_protection)
	nSlots -= 2;

    return nSlots;
}
