/********************************************************************
 *                                                                  *
 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2015             *
 * by the Xiph.Org Foundation http://www.xiph.org/                  *
 *                                                                  *
 ********************************************************************

 function: PCM data vector blocking, windowing and dis/reassembly
 last mod: $Id: block.c 19457 2015-03-03 00:15:29Z giles $

 Handle windowing, overlap-add, etc of the PCM vectors.  This is made
 more amusing by Vorbis' current two allowed block sizes.

 ********************************************************************/

#include <string.h>
#include "os.h"

#include "window.h"
#include "mdct.h"
#include "registry.h"
#include "misc.h"

int32_t ilog2(uint32_t v)
{
  int32_t ret = 0;

  if (v) --v;
  while(v){
    ret++;
    v >>= 1;
  }

  return(ret);
}

/* pcm accumulator examples (not exhaustive):

 <-------------- lW ---------------->
                   <--------------- W ---------------->
:            .....|.....       _______________         |
:        .'''     |     '''_---      |       |\        |
:.....'''         |_____--- '''......|       | \_______|
:.................|__________________|_______|__|______|
                  |<------ Sl ------>|      > Sr <     |endW
                  |beginSl           |endSl  |  |endSr
                  |beginW            |endlW  |beginSr


                      |< lW >|
                   <--------------- W ---------------->
                  |   |  ..  ______________            |
                  |   | '  `/        |     ---_        |
                  |___.'___/`.       |         ---_____|
                  |_______|__|_______|_________________|
                  |      >|Sl|<      |<------ Sr ----->|endW
                  |       |  |endSl  |beginSr          |endSr
                  |beginW |  |endlW
                  mult[0] |beginSl                     mult[n]

 <-------------- lW ----------------->
                          |<--W-->|
:            ..............  ___  |   |
:        .'''             |`/   \ |   |
:.....'''                 |/`....\|...|
:.........................|___|___|___|
                          |Sl |Sr |endW
                          |   |   |endSr
                          |   |beginSr
                          |   |endSl
        |beginSl
        |beginW
*/

/* block abstraction setup *********************************************/

#ifndef WORD_ALIGN
#define WORD_ALIGN 8
#endif

int vorbis_block_init(vorbis_dsp_state *v, vorbis_block *vb){
  memset(vb,0,sizeof(*vb));
  vb->vd=v;
  vb->localalloc=0;
  vb->localstore=NULL;

  return(0);
}

void* _vorbis_block_alloc(vorbis_block *vb,int32_t bytes){
  void* ret = NULL;
  bytes=(bytes+(WORD_ALIGN-1)) & ~(WORD_ALIGN-1);
  if(bytes+vb->localtop>vb->localalloc){
    /* can't just _ogg_realloc... there are outstanding pointers */
    if(vb->localstore){
      struct alloc_chain *link=_ogg_malloc(sizeof(*link));
      if (!link) return NULL;
      vb->totaluse+=vb->localtop;
      link->next=vb->reap;
      link->ptr=vb->localstore;
      vb->reap=link;
    }
    /* highly conservative */
    ret = _ogg_malloc(bytes);
    if (!ret) return NULL;
    vb->localalloc=bytes;
    vb->localstore=ret;
    vb->localtop=0;
  }
  ret=(void *)(((char *)vb->localstore)+vb->localtop);
  vb->localtop+=bytes;
  return ret;
}

/* reap the chain, pull the ripcord */
void _vorbis_block_ripcord(vorbis_block *vb){
  /* reap the chain */
  struct alloc_chain *reap=vb->reap;
  while(reap){
    struct alloc_chain *next=reap->next;
    _ogg_free(reap->ptr);
    _ogg_free(reap);
    reap=next;
  }
  /* consolidate storage */
  if(vb->totaluse){
    vb->localstore=_ogg_realloc(vb->localstore,vb->totaluse+vb->localalloc);
    if (vb->localstore) vb->localalloc += vb->totaluse;
    else vb->localalloc = 0;
    vb->totaluse=0;
  }

  /* pull the ripcord */
  vb->localtop=0;
  vb->reap=NULL;
}

int vorbis_block_clear(vorbis_block *vb){
  _vorbis_block_ripcord(vb);
  if(vb->localstore)_ogg_free(vb->localstore);

  memset(vb,0,sizeof(*vb));
  return(0);
}

/* Analysis side code, but directly related to blocking.  Thus it's
   here and not in analysis.c (which is for analysis transforms only).
   The init is here because some of it is shared */

int vorbis_synthesis_init(vorbis_dsp_state *v,vorbis_info *vi){
  int i, rc = 1;
  codec_setup_info *ci=vi->codec_setup;
  private_state *b=NULL;

  if(ci==NULL||
     ci->modes<=0||
     ci->blocksizes[0]<64||
     ci->blocksizes[1]<ci->blocksizes[0]){
    return 1;
  }
  memset(v,0,sizeof(*v));
  b=v->backend_state=_ogg_calloc(1,sizeof(*b));
  if(!b) return OV_EMEMORY;

  v->vi=vi;
  b->modebits=ilog2(ci->modes);

  b->transform[0]=_ogg_calloc(VI_TRANSFORMB,sizeof(*b->transform[0]));
  if(!b->transform[0]) return OV_EMEMORY;
  b->transform[1]=_ogg_calloc(VI_TRANSFORMB,sizeof(*b->transform[1]));
  if(!b->transform[0]) return OV_EMEMORY;

  /* MDCT is tranform 0 */

  b->transform[0][0]=_ogg_calloc(1,sizeof(mdct_lookup));
  if(!b->transform[0][0]) return OV_EMEMORY;
  b->transform[1][0]=_ogg_calloc(1,sizeof(mdct_lookup));
  if(!b->transform[1][0]) return OV_EMEMORY;
  mdct_init(b->transform[0][0],ci->blocksizes[0]);
  mdct_init(b->transform[1][0],ci->blocksizes[1]);

  /* Vorbis I uses only window type 0 */
  b->window[0]=_vorbis_window(0,ci->blocksizes[0]/2);
  b->window[1]=_vorbis_window(0,ci->blocksizes[1]/2);

  /* finish the codebooks */
  if (!ci->fullbooks){
    ci->fullbooks=_ogg_calloc(ci->books,sizeof(*ci->fullbooks));
    if(!ci->fullbooks) return OV_EMEMORY;
  }
  for(i=0;i<ci->books;i++){
    if(ci->book_param[i]==NULL)
      goto abort_books;
    if((rc=vorbis_book_init_decode(ci->fullbooks+i,ci->book_param[i]))!=0)
      goto abort_books;
    /* decode codebooks are now standalone after init */
    vorbis_staticbook_destroy(ci->book_param[i]);
    ci->book_param[i]=NULL;
  }

  /* initialize the storage vectors. blocksize[1] is small for encode,
     but the correct size for decode */
  v->pcm_storage=ci->blocksizes[1];
  v->pcm=_ogg_malloc(vi->channels*sizeof(*v->pcm));
  if(!v->pcm) return OV_EMEMORY;
  v->pcmret=_ogg_malloc(vi->channels*sizeof(*v->pcmret));
  if(!v->pcmret) return OV_EMEMORY;
  for(i=0;i<vi->channels;i++)
  {
    v->pcm[i]=_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i]));
    if(!v->pcm[i]) return OV_EMEMORY;
  }

  /* all 1 (large block) or 0 (small block) */
  /* explicitly set for the sake of clarity */
  v->lW=0; /* previous window size */
  v->W=0;  /* current window size */

  /* all vector indexes */
  v->centerW=ci->blocksizes[1]/2;

  v->pcm_current=v->centerW;

  /* initialize all the backend lookups */
  b->flr=_ogg_calloc(ci->floors,sizeof(*b->flr));
  if(!b->flr) return OV_EMEMORY;
  b->residue=_ogg_calloc(ci->residues,sizeof(*b->residue));
  if(!b->residue) return OV_EMEMORY;

  for(i=0;i<ci->floors;i++)
    b->flr[i]=_floor_P[ci->floor_type[i]]->
      look(v,ci->floor_param[i]);

  for(i=0;i<ci->residues;i++)
    b->residue[i]=_residue_P[ci->residue_type[i]]->
      look(v,ci->residue_param[i]);

  v->pcm_returned=-1;
  v->granulepos=-1;
  v->sequence=-1;
  v->eofflag=0;
  ((private_state *)(v->backend_state))->sample_count=-1;

  return 0;
abort_books:
  for(i=0;i<ci->books;i++){
    if(ci->book_param[i]!=NULL){
      vorbis_staticbook_destroy(ci->book_param[i]);
      ci->book_param[i]=NULL;
    }
  }
  return rc;
}

void vorbis_dsp_clear(vorbis_dsp_state *v){
  int i;
  if(v){
    vorbis_info *vi=v->vi;
    codec_setup_info *ci=(vi?vi->codec_setup:NULL);
    private_state *b=v->backend_state;

    if(b){
      if(b->transform[0]){
        mdct_clear(b->transform[0][0]);
        _ogg_free(b->transform[0][0]);
        _ogg_free(b->transform[0]);
      }
      if(b->transform[1]){
        mdct_clear(b->transform[1][0]);
        _ogg_free(b->transform[1][0]);
        _ogg_free(b->transform[1]);
      }

      if(b->flr){
        if(ci)
        for(i=0;i<ci->floors;i++)
          _floor_P[ci->floor_type[i]]->
            free_look(b->flr[i]);
        _ogg_free(b->flr);
      }
      if(b->residue){
        if(ci)
        for(i=0;i<ci->residues;i++)
          _residue_P[ci->residue_type[i]]->
            free_look(b->residue[i]);
        _ogg_free(b->residue);
      }
    }

    if(v->pcm){
      if(vi)
      for(i=0;i<vi->channels;i++)
        if(v->pcm[i])_ogg_free(v->pcm[i]);
        _ogg_free(v->pcm);
        if(v->pcmret)_ogg_free(v->pcmret);
    }

    if(b){
      _ogg_free(b);
    }

    memset(v,0,sizeof(*v));
  }
}

/* Unlike in analysis, the window is only partially applied for each
   block.  The time domain envelope is not yet handled at the point of
   calling (as it relies on the previous block). */

extern void pcmadd(int32_t* dst, int32_t* src, int n);
extern void pcmcpy(int32_t* dst, int32_t* src, int n);

int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
  vorbis_info *vi=v->vi;
  codec_setup_info *ci=vi->codec_setup;
  private_state *b=v->backend_state;
  int j;

  if(!vb)return(OV_EINVAL);
  if(v->pcm_current>v->pcm_returned  && v->pcm_returned!=-1)return(OV_EINVAL);

  v->lW=v->W;
  v->W=vb->W;
  v->nW=-1;

  if((v->sequence==-1)||
     (v->sequence+1 != vb->sequence)){
    v->granulepos=-1; /* out of sequence; lose count */
    b->sample_count=-1;
  }

  v->sequence=vb->sequence;

  if(vb->pcm){  /* no pcm to process if vorbis_synthesis_trackonly
       was called on block */
    int n=ci->blocksizes[v->W]/2;
    int n0=ci->blocksizes[0]/2;
    int n1=ci->blocksizes[1]/2;

    int thisCenter;
    int prevCenter;

    if(v->centerW){
      thisCenter=n1;
      prevCenter=0;
    }else{
      thisCenter=0;
      prevCenter=n1;
    }

    /* v->pcm is now used like a two-stage double buffer.  We don't want
       to have to constantly shift *or* adjust memory usage.  Don't
       accept a new block until the old is shifted out */

    for(j=0;j<vi->channels;j++){
      /* the overlap/add section */
      if(v->lW){
        if(v->W){
          /* large/large */
#if 0
          int32_t *pcm=v->pcm[j]+prevCenter;
          int32_t *p=vb->pcm[j];
          for(i=0;i<n1;i++)
            pcm[i]+=p[i];
#else
          pcmadd(v->pcm[j]+prevCenter, vb->pcm[j], n1);
#endif
        }else{
#if 0
          /* large/small */
          int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2;
          int32_t *p=vb->pcm[j];
          for(i=0;i<n0;i++)
            pcm[i]+=p[i];
#else
          pcmadd(v->pcm[j]+prevCenter+n1/2-n0/2, vb->pcm[j], n0);
#endif
        }
      }else{
        if(v->W){
          /* small/large */
          int32_t *pcm=v->pcm[j]+prevCenter;
          int32_t *p=vb->pcm[j]+n1/2-n0/2;
#if 0
          for(i=0;i<n0;i++)
            pcm[i]+=p[i];
          for(;i<n1/2+n0/2;i++)
            pcm[i]=p[i];
#else
          pcmadd(pcm, p, n0);
          pcmcpy(pcm+n0,p+n0, n1/2-n0/2);
#endif
        }else{
#if 0
          /* small/small */
          int32_t *pcm=v->pcm[j]+prevCenter;
          int32_t *p=vb->pcm[j];
          for(i=0;i<n0;i++)
            pcm[i]+=p[i];
#else
          pcmadd(v->pcm[j]+prevCenter, vb->pcm[j], n0);
#endif
        }
      }

      /* the copy section */
      {
#if 0
        int32_t *pcm=v->pcm[j]+thisCenter;
        int32_t *p=vb->pcm[j]+n;
        for(i=0;i<n;i++)
          pcm[i]=p[i];
#else
        pcmcpy(v->pcm[j]+thisCenter,vb->pcm[j]+n, n);
#endif
      }
    }

    if(v->centerW)
      v->centerW=0;
    else
      v->centerW=n1;

    /* deal with initial packet state; we do this using the explicit
       pcm_returned==-1 flag otherwise we're sensitive to first block
       being short or long */

    if(v->pcm_returned==-1){
      v->pcm_returned=thisCenter;
      v->pcm_current=thisCenter;
    }else{
      v->pcm_returned=prevCenter;
      v->pcm_current=prevCenter+
      ci->blocksizes[v->lW]/4+
      ci->blocksizes[v->W]/4;
    }
  }

  /* track the frame number... This is for convenience, but also
     making sure our last packet doesn't end with added padding.  If
     the last packet is partial, the number of samples we'll have to
     return will be past the vb->granulepos.

     This is not foolproof!  It will be confused if we begin
     decoding at the last page after a seek or hole.  In that case,
     we don't have a starting point to judge where the last frame
     is.  For this reason, vorbisfile will always try to make sure
     it reads the last two marked pages in proper sequence */

  if(b->sample_count==-1){
    b->sample_count=0;
  }else{
    b->sample_count+=(int32_t)(ci->blocksizes[v->lW]/4+ci->blocksizes[v->W]/4);
  }

  if(v->granulepos==-1){
    if(vb->granulepos!=-1){ /* only set if we have a position to set to */

      v->granulepos=vb->granulepos;

      /* is this a short page? */
      if(b->sample_count>v->granulepos){
        /* corner case; if this is both the first and last audio page,
           then spec says the end is cut, not beginning */
        int32_t extra=b->sample_count-vb->granulepos;

        /* we use ogg_int64_t for granule positions because a
           uint64 isn't universally available.  Unfortunately,
           that means granposes can be 'negative' and result in
           extra being negative */
        if(extra<0)
          extra=0;

        if(vb->eofflag){
          /* trim the end */
          /* no preceding granulepos; assume we started at zero (we'd
             have to in a short single-page stream) */
          /* granulepos could be -1 due to a seek, but that would result
             in a long count, not short count */

          /* Guard against corrupt/malicious frames that set EOP and
             a backdated granpos; don't rewind more samples than we
             actually have */
          if(extra > v->pcm_current - v->pcm_returned)
            extra = v->pcm_current - v->pcm_returned;

          v->pcm_current-=extra;
        }else{
          /* trim the beginning */
          v->pcm_returned+=extra;
          if(v->pcm_returned>v->pcm_current)
            v->pcm_returned=v->pcm_current;
        }
      }
    }
  }else{
    v->granulepos+=ci->blocksizes[v->lW]/4+ci->blocksizes[v->W]/4;
    if(vb->granulepos!=-1 && v->granulepos!=vb->granulepos){

      if(v->granulepos>vb->granulepos){
        int32_t extra=(int32_t)(v->granulepos-vb->granulepos);

        if (extra)
          if(vb->eofflag){
            /* partial last frame.  Strip the extra samples off */

            /* Guard against corrupt/malicious frames that set EOP and
               a backdated granpos; don't rewind more samples than we
               actually have */
            if(extra > (v->pcm_current - v->pcm_returned))
              extra = (v->pcm_current - v->pcm_returned);

            /* we use ogg_int64_t for granule positions because a
               uint64 isn't universally available.  Unfortunately,
               that means granposes can be 'negative' and result in
               extra being negative */
            if(extra<0)
              extra=0;

            v->pcm_current-=extra;
          } /* else {Shouldn't happen *unless* the bitstream is out of
		             spec.  Either way, believe the bitstream } */
      }/* else{ Shouldn't happen *unless* the bitstream is out of
                spec.  Either way, believe the bitstream } */
      v->granulepos=vb->granulepos;
    }
  }

  /* Update, cleanup */

  if(vb->eofflag)v->eofflag=1;

  return(0);
}

/* pcm==NULL indicates we just want the pending samples, no more */
int vorbis_synthesis_pcmout(vorbis_dsp_state *v,xint ***pcm){
  vorbis_info *vi=v->vi;
  if(v->pcm_returned>-1 && v->pcm_returned<v->pcm_current){
    if(pcm){
      int i;
      for(i=0;i<vi->channels;i++)
  v->pcmret[i]=v->pcm[i]+v->pcm_returned;
      *pcm=v->pcmret;
    }
    return(v->pcm_current-v->pcm_returned);
  }
  return(0);
}

int vorbis_synthesis_read(vorbis_dsp_state *v,int n){
  if(n && v->pcm_returned+n>v->pcm_current)return(OV_EINVAL);
  v->pcm_returned+=n;
  return(0);
}
