/* Copyright 2001 Matt Flax <flatmax@ieee.org>
   This file is part of MFFM Time Scale Modification for Audio.

   MFFM Time Scale Modification for Audio is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
   
   MFFM Time Scale Modification for Audio is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   
   You have received a copy of the GNU General Public License
   along with MFFM Time Scale Modification for Audio
 */
#ifndef WSOLA_H_
#define WSOLA_H_

///Use this to debug general
//#define W_DEBUG

#define VERSION_NUM "4.3"

//This function is for scientific rounding
#ifndef roundD
//#define roundD(a) ((a-(int)a>=0.5) ? ceil(a) : floor(a))
#define roundD(a) rint(a)
#endif

#include <values.h>
#ifndef MAXDOUBLE
#define MAXDOUBLE 1.79769e+308
#endif

#define c_re(c) ((c)[0])
#define c_im(c) ((c)[1])

#include <string.h>
#include <iostream>
using namespace std;
#include "hanning.H"

//From the MFFM FFTw C++ wrapper available at sourceforge.net
#include <mffm/complexFFT.H>
#include <complex>

//From the MFFM_TimeCode library available at sourceforge.net
#include <mffm/timeCode.H>

#ifndef ATYPE
#define ATYPE short int
#endif

#ifndef SAMPLE_FREQUENCY
#define SAMPLE_FREQUENCY 44100 ///The default sample frequency
#endif

///The hanning window duration in s
#define HANNING_DURATION 0.085
//The proportional hanning overlap
#define HANNING_OVERLAP 0.5
///The sample length of the hanning window
#define HANNING_LENGTH(sampFreq) (int)roundD(HANNING_DURATION*(double)sampFreq)

//The pitch (Hz) which we don't want to exceed the period of durin matching
#define DELTA_DIVISOR 6.0

/* reference :
   An Overlap Add Technique Based on Waveform Similarity (WSOLA)
   For High Quality Time-Scale Modification of Speech
   Werner Verhelst and Marc Roelands
   IEEE publication
*/

///The time code structures used within
#define MASTERCOUNTERTYPE_W MasterCounter<field, 1>
#define TIMECODETYPE_W TimeCode<MASTERCOUNTERTYPE_W, ATYPE>

class WSOLA : public Hanning {
  TIMECODETYPE_W sFrame, dFrame; //Source and desired frames
  //, nextSFrame;//src and dest and next src frames
  ATYPE *output; // The timescale modified audio
  ATYPE *input; // The input data
  //int count;

  int N; //The total size of the fft data structures
  ///The fft structure used to implement the convolution
  complexFFT *fft;
  ///The fft data structures
  //complexFFTData *sFrameFFTData, *dFrameFFTData, *hanningFFTData;
  complexFFTData *sFrameFFTData, *dFrameFFTData, *hanningFFTData;

  int hanningLength, deltaMax, sampleFrequency;

  ///Inits the fft structures
  void initFFTStructures(int count){
    if (sFrameFFTData)
      if (sFrameFFTData->getSize()!=count)
	deInitFFTStructures();
      else
	return;
	
    //Set up the fft structures
    if (!(sFrameFFTData=new complexFFTData(count)))
      cerr << "WSOLA::initFFTStructures : couldn't malloc sFrameFFTData of size "<<count<<endl;
    if (!(dFrameFFTData=new complexFFTData(count)))
      cerr << "WSOLA::initFFTStructures : couldn't malloc dFrameFFTData of size "<<count<<endl;
    if (!(hanningFFTData=new complexFFTData(count)))
      cerr << "WSOLA::initFFTStructures : couldn't malloc hanningFFTData of size "<<count<<endl;
    if (!(fft=new complexFFT(hanningFFTData)))
      cerr << "WSOLA::initFFTStructures : couldn't malloc fft"<<endl;
    //set the hanning data here
    for (int i=0;i<hanningLength;i++){
      c_re(hanningFFTData->in[i])=wnd[i];
      c_im(hanningFFTData->in[i])=0.0;
    }
    for (int i=hanningLength;i<count;i++)
      c_re(hanningFFTData->in[i])=c_im(hanningFFTData->in[i])=0.0;
    fft->fwdTransform();
    return;
  }

  /// De-Inits the FFT structures
  void deInitFFTStructures(void){
    if (fft) delete fft; fft=NULL;
    if (hanningFFTData) delete hanningFFTData; hanningFFTData=NULL;
    if (sFrameFFTData) delete sFrameFFTData; sFrameFFTData=NULL;
    if (dFrameFFTData) delete dFrameFFTData; dFrameFFTData=NULL;
  }

  /* call this to init structures which aren't FFT based */
  int newInit(void){
#ifdef W_DEBUG
    cout<<"WSOLA::newInit : enter "<<endl;
#endif
    if (output) delete [] output;
    if (!(output=new ATYPE[hanningLength])){
      cerr<<"WSOLA::process : output audio array malloc failure"<<endl;
      return -1;
    }
    if (input) delete [] input;
    if (!(input=new ATYPE[hanningLength+deltaMax])){
      cerr<<"WSOLA::process : output audio array malloc failure"<<endl;
      return -2;
    }
    bzero(output, hanningLength*sizeof(ATYPE));
    bzero(input, (hanningLength+deltaMax)*sizeof(ATYPE));
    
    //Set up the time codes - default to max int size
    sFrame.init(0,hanningLength); // Dumy inits ... set maximum now
    //sFrame.setMaxCount(MAXINT-1);
    sFrame.setFinish(MAXINT-1);
    sFrame.setEnd(MAXINT-2); // Make sure you don't try to set the current count to the same as the finish count or it will loop !

    dFrame.init(0,hanningLength); // Dumy inits ... set maximum now
    dFrame.setFinish(MAXINT-1);
    dFrame.setEnd(MAXINT-2);

    //    cout<<"maximal sound file size = "<<MAXINT<<" frames"<<endl;
    //Ensure the array frame and window sizes are correct
    (*sFrame.window)=hanningLength;
    //(*nextSFrame.window)=hanningLength;
    (*dFrame.window)=hanningLength;
    sFrame.window->setFrameSize(sizeof(ATYPE));
    //nextSFrame.window->setFrameSize(sizeof(ATYPE));
    dFrame.window->setFrameSize(sizeof(ATYPE));

    //Process input frame by input frame ....
    sFrame=0;
    dFrame=0;

    cout<<"sFrame"<<endl;
    cout<<sFrame<<endl;
    cout<<"dFrame"<<endl;
    cout<<dFrame<<endl;

    //    exit(-1);

#ifdef W_DEBUG
    cout<<"WSOLA::newInit : exit "<<endl;
#endif

    return 0;
  }

  ///This function returns the maximum similarity location
  int findSimilarityFFT(){
    //time data must be preloaded 
    fft->switchData(sFrameFFTData);
    fft->fwdTransform();
    fft->switchData(dFrameFFTData);
    fft->fwdTransform();

    // apply hanning to next source frame in this domain ...
    //multiply the outputs
      for (int i=0;i<N;i++){
	std::complex<fftw_real> res=(std::complex<fftw_real>(c_re(sFrameFFTData->out[i]), c_im(sFrameFFTData->out[i]))*
				     std::complex<fftw_real>(c_re(hanningFFTData->out[i]), -c_im(hanningFFTData->out[i]))-
				     std::complex<fftw_real>(c_re(dFrameFFTData->out[i]), c_im(dFrameFFTData->out[i])));
	c_re(sFrameFFTData->out[i])=res.real();
	c_im(sFrameFFTData->out[i])=res.imag();
      }
      //Make sure we point to the correct output buffer
      fft->switchData(sFrameFFTData);
      //inverse transform ...
      fft->invTransform();

      //Scan for the maximum
      double maximum=-MAXDOUBLE, tempD;
      int bestLocation=0;
      for (int i=0;i<deltaMax;i+=channels){
	tempD=c_re(sFrameFFTData->in[i]);
	for (int j=1;j<channels;j++)
	  tempD+=c_re(sFrameFFTData->in[i+j]);
	//for (int i=0;i<deltaMax;i++){
	//	if ((tempD=c_re(sFrameFFTData->in[i]))>maximum){
	if (tempD>maximum){
	  bestLocation=i;
	  maximum=tempD;
	}
      }
      return bestLocation;
  }

  int processFrameFFT(void){
#ifdef W_DEBUG
    cout<<"WSOLA::processFrameFFT : enter "<<endl;
#endif

    ATYPE* tempDFW=dFrame.window->getDataPtr();
    for (int i=0;i<hanningLength;i++){ //Load the desired range
      c_re(dFrameFFTData->in[i])=(int)round((double)tempDFW[i]*wnd[i]);
      c_im(dFrameFFTData->in[i])=0.0;
    }
    for (int i=hanningLength;i<hanningLength+deltaMax;i++)
      c_im(dFrameFFTData->in[i])=c_re(dFrameFFTData->in[i])=0.0;

    for (int i=0;i<hanningLength+deltaMax;i++){ //Load the search range
      c_re(sFrameFFTData->in[i])=(double)input[i];
      c_im(sFrameFFTData->in[i])=0.0;
    }

    //Search through for the closest match
    int bestLocation=0;
    bestLocation=findSimilarityFFT();

#ifdef W_DEBUG
    cout<<"WSOLA::processFrame : exit"<<endl;
#endif
    return bestLocation;
  }

  void shiftOn(double tau){
    for (int i=0;i<hanningLength/2.0;i++)
      output[i]=output[i+(int)((double)hanningLength/2.0)];
    for (int i=0;i<hanningLength/2.0;i++)
      output[i+(int)((double)hanningLength/2.0)]=0;
    
    
    // Locate to the next desired frame to match to
    dFrame=(int)roundD(((double)dFrame.getCount()+(double)hanningLength/2.0/tau));
    // Locate to the corresponding search region
    sFrame=(int)roundD(dFrame.getCount());
    
    //old method (4.1)
    //dFrame=(int)roundD(((double)dFrame.getCount()+(double)hanningLength/2.0));
    //sFrame=(int)roundD(dFrame.getCount()/tau);
  }
  
  int findAndCopyBestMatch(void){
    int ret=0;
    int wndCnt=hanningLength, bestLocation;
    //Set the source frame to its beginning location
    if ((bestLocation=processFrameFFT())<0){
      cout<<"error ocurred during processFrameFFT function"<<endl;
      ret=PROCFFT_ERR;
    }

    //Copy the closest match over to the input audio array
    sFrame=sFrame.getCount()+bestLocation;

    /*//flatmax
    if ((ret=readNWav(sFrame.getCount()/channels, hanningLength/channels, channels, inputFile, input))!=hanningLength/channels){
      mustReturn=INPUT_READ2SF_ERR;;
      cout<<'\n';
      cout<<"error reading from input file to source frame.."<<endl;
      if (ret>0){ //Process this last frame
	cout<<"End of file reached, processing last frame "<<ret<<endl;
	//	for (int i=ret-1;i<(hanningLength+deltaMax);i++)
	//  (sFrame.window)[i]=(ATYPE)0.0;
      } else {
	cout<<"Nothing more to read, exiting with INPUT_READ2SF_ERR"<<endl;
	sf_perror(inputFile);
	return INPUT_READ2SF_ERR;
      }
      }*/

    ATYPE* tempSFW=sFrame.window->getDataPtr();
    for (int i=0;i<hanningLength;i++) // Copy over by windowing and adding
      //      output[i]+=(ATYPE)roundD((double)input[i]*wnd[i]);
      output[i]+=(ATYPE)roundD((double)tempSFW[i+bestLocation]*wnd[i]);
      //cout<<tempSFW[i+bestLocation]<<'\t'<<output[i]<<'\n';
    return ret;
  }

public:
  //Labeled errors
  typedef enum _errors {DEFAULT_ERR=-1, INPUT_READ2DF_ERR=-2, INPUT_READ2SF_ERR=-3, WRITE_ERR=-4, PROCFFT_ERR=-5} errors;
  
  int channels;
  /**Constructor ...
     This constructor takes the length of the hanning window, the sample
     frequency, the channel count (default=1)
  */
  WSOLA(int hl=HANNING_LENGTH(SAMPLE_FREQUENCY), int sf=SAMPLE_FREQUENCY, int ch=1) : Hanning(hl){
    
#ifdef W_DEBUG
    cout<<"WSOLA::WSOLA : enter"<<endl;
#endif
    channels=ch;
    cout<<"Assuming "<<channels<<" channels"<<endl;
    hanningLength=hl;
    deltaMax=(int)roundD((double)hanningLength/DELTA_DIVISOR);
    
    //Make sure we are using a valid deltaMax with respect to channel count
    while (remainder((double)(hanningLength+deltaMax)/(double)channels,floor((double)(hanningLength+deltaMax)/(double)channels))!=0.0){
      cout<<"hanning+delta remainder = "<<remainder((double)(hanningLength+deltaMax)/(double)channels,floor((double)(hanningLength+deltaMax)/(double)channels))<<endl;
      deltaMax++;
    }

    //Set up null pointers
    fft=NULL;
    //sFrameFFTData=dFrameFFTData=NULL;
    sFrameFFTData=NULL;
    N=deltaMax+hanningLength;
    initFFTStructures(N);

    sampleFrequency=sf;
    input=output=NULL;
    //count=0;

    if (newInit()<0){
      cout<<"WSOLA:: error - couldn't init the structures - out of memory ?"<<endl;
      exit(-1);
    }
#ifdef W_DEBUG
    cout<<"hanningLenght = "<<hanningLength<<" deltaMax = "<<deltaMax<<endl;
    cout<<"WSOLA::WSOLA : exit"<<endl;
#endif
  }

  ///Destructor
  ~WSOLA(void){
#ifdef W_DEBUG
    cout<<"WSOLA::~WSOLA : enter"<<endl;
#endif
    if (output) delete [] output;
    if (input) delete [] input;
    deInitFFTStructures();
#ifdef W_DEBUG
    cout<<"WSOLA::~WSOLA : exit"<<endl;
#endif
  }

  /** This function deals with the first half frame in the signal - 
      must be called first */
  int initProcess(SNDFILE* inputFile, double tau){
    //Set up the first half output frame ...

    int halfWndCnt=(int)((double)hanningLength/2.0/channels);
    cout<<"halfWndCnt "<<(double)hanningLength/2.0/channels<<endl;
    
    sFrame=0; //Reset the timecode to zero
    dFrame=0;
    int readn;
    if ((readn=readNWav(0, halfWndCnt, channels, inputFile, input))!=halfWndCnt){
      cout<<"error reading from first input file to nextSFeame... tried to read "<<halfWndCnt<<" frame but read "<<readn<<" instead"<<endl;
      sf_perror(inputFile);
      return INPUT_READ2SF_ERR;
    }
    return initProcess(input, tau);
  }
  
  int initProcess(const ATYPE*inputSrc, double tau){ 
    int halfWndCnt=(int)((double)hanningLength/2.0/channels);
    cout<<"halfWndCnt "<<(double)hanningLength/2.0/channels<<endl;
    for (int i=0;i<halfWndCnt;i++)
      output[i]+=(ATYPE)roundD((double)input[i]*wnd[i+halfWndCnt]);
    
    // Locate to the next desired frame to match to
    dFrame=(int)roundD(((double)dFrame.getCount()+(double)hanningLength/2.0/tau));
    // Locate to the corresponding search region
    sFrame=(int)roundD(dFrame.getCount());
  }

  /** Called by the user to process a frame */
  int processFrame(ATYPE *extOutput, double tau){
#ifdef W_DEBUG
    cout<<"WSOLA::processFrame(tau) : enter"<<endl;
#endif
    int ret=0;
    ret=findAndCopyBestMatch();

    for (int i=0;i<hanningLength/2;i++) //Load the output of the algorithm
      extOutput[i]=output[i];

    shiftOn(tau);
    cout<<"Start: "<<dFrame.getBeginning()<<"\tCurrent: "<<dFrame.getCount()<<"\t\tStop: "<<dFrame.getEnd()<<'\r';
    return ret;
  }

  /** Called by the user to process a frame */
  int processFrame(SNDFILE* inputFile, SNDFILE* outputFile, double tau){
#ifdef W_DEBUG
    cout<<"WSOLA::processFrame(inputFile, outputFile, tau) : enter"<<endl;
#endif

#ifdef W_DEBUG
    //    cout<<"WSOLA::processFrame : processing : "<<cnt<<" samples"<<endl;
    //    cout<<"WSOLA::processFrame : initting output array to : "<<(int)roundD(tau*(double)cnt)<<endl;
#endif
    int ret=0, mustReturn=0;

    //Load the desired src frame window we are matching to
    ATYPE* tempDFW=dFrame.window->getDataPtr();
    //cout<<"trying to read "<<dFrame.window->getCount()/channels<<" frames"<<endl;
    if ((ret=readNWav(dFrame.getCount()/channels, dFrame.window->getCount()/channels, channels, inputFile, &tempDFW[0]))!=dFrame.window->getCount()/channels){
      cout<<'\n';
      cout<<"error reading from input file to the desired frame ... "<<endl;
      mustReturn=INPUT_READ2DF_ERR;
      if (ret>0){ //Process this last frame
	cout<<"End of file reached, processing last frame: case 1 "<<ret<<'\t'<<dFrame.window->getCount()/channels<<endl;
	//for (int i=ret-1;i<dFrame.window->getCount();i++)
	//  (dFrame.window)[i]=(ATYPE)0.0;
      } else {
	cout<<"Nothing more to read, exiting with INPUT_READ2DF_ERR"<<endl;
	sf_perror(inputFile);
	return INPUT_READ2DF_ERR;
      }
    }
    
    //load the search region data
    //cout<<"trying to read "<<(hanningLength+deltaMax)/channels<<" frames"<<endl;
    //cout<<"reading sFrame from "<<sFrame.getCount()/channels<<endl;
    if (readNWav(sFrame.getCount()/channels, (hanningLength+deltaMax)/channels, channels, inputFile, sFrame.window->getDataPtr())!=(hanningLength+deltaMax)/channels){
      mustReturn=INPUT_READ2SF_ERR;
      cout<<'\n';
      cout<<"error reading from input file to source frame.."<<endl;
      if (ret>0){ //Process this last frame
	cout<<"End of file reached, processing last frame : case 2 "<<ret<<endl;
	//	for (int i=ret-1;i<(hanningLength+deltaMax);i++)
	//  (sFrame.window)[i]=(ATYPE)0.0;
      } else {
	cout<<"Nothing more to read, exiting with INPUT_READ2SF_ERR"<<endl;
	sf_perror(inputFile);
	return INPUT_READ2SF_ERR;
      }
    }

    //    for (int i=0;i<(hanningLength+deltaMax);i++)
    //  cout<<(*sFrame.window)[i]<<'\t';
    //cout<<endl;

    findAndCopyBestMatch();

    //Save the output and shift on half a frame
    int written;
    if ((written=writeNSndFile(outputFile, (int)((double)hanningLength/2.0), output))!=(int)((double)hanningLength/2.0)){
      cout<<"WSOLA::processFrame : error writing to output file. Wanted to write "<<(int)((double)hanningLength/2.0)<<" but wrote "<<written <<" instead"<<endl;
      sf_perror(outputFile);
      return WRITE_ERR;
    }
    cout<<"Start: "<<dFrame.getBeginning()<<"\tCurrent: "<<dFrame.getCount()<<"\t\tStop: "<<dFrame.getEnd()<<'\r';
  
  shiftOn(tau);
  
  if (mustReturn<0)//Return correctly on error
    return mustReturn;
  
  return ret;
  
#ifdef W_DEBUG
  cout<<"WSOLA::process : exit"<<endl;
#endif
  }

  ///This is the size of the modified (output) data
  int getCount(void){return dFrame.getEnd();}

  ///This is the index in input stream to read from
  int getInputIndex(void){return dFrame.getCount();}
  ///This is the length of elements required by the input stream to read
  int getSourceLength(void){return (hanningLength+deltaMax);}
  ///Loads an external memory source stream to correct locations in WSOLA
  void loadSourceInput(ATYPE *inSrc){
    // Copies memory location to memory location
    memcpy(sFrame.window->getDataPtr(), inSrc, (hanningLength+deltaMax)*sizeof(ATYPE));
    // Copies memory location to memory location
    memcpy(dFrame.window->getDataPtr(), inSrc, hanningLength*sizeof(ATYPE));
  }

  ///This is the length of elements required by the desired frame
  int getDesiredLength(void){return dFrame.window->getCount();}

  ///This is for indexing the modified (output) data
  ATYPE& operator[](int i){
    return output[i];
  }
};
#endif //WSOLA_H_
