 /************************************************************************/
 /*                                                                      */
 /*                Centre for Speech Technology Research                 */
 /*                     University of Edinburgh, UK                      */
 /*                       Copyright (c) 1996,1997                        */
 /*                        All Rights Reserved.                          */
 /*                                                                      */
 /*  Permission to use, copy, modify, distribute this software and its   */
 /*  documentation for research, educational and individual use only, is */
 /*  hereby granted without fee, subject to the following conditions:    */
 /*   1. The code must retain the above copyright notice, this list of   */
 /*      conditions and the following disclaimer.                        */
 /*   2. Any modifications must be clearly marked as such.               */
 /*   3. Original authors' names are not deleted.                        */
 /*  This software may not be used for commercial purposes without       */
 /*  specific prior written permission from the authors.                 */
 /*                                                                      */
 /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK       */
 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING     */
 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT  */
 /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE    */
 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES   */
 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN  */
 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,         */
 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF      */
 /*  THIS SOFTWARE.                                                      */
 /*                                                                      */
 /*************************************************************************/
 /*                                                                       */
 /*                 Author: Richard Caley (rjc@cstr.ed.ac.uk)             */
 /*                   Date: Fri Jul 25 1997                               */
 /* --------------------------------------------------------------------- */
 /* A simple imposition module which changes the durations and            */
 /* pitch.                                                                */
 /*                                                                       */
 /*************************************************************************/

#include "../WaveSynthesis/FramesUnit.h"
#include "simple_impose.h"
#include "frames_imposition_support.h"
#include "TargetTracker.h"
#include "module_support.h"

static ModuleDescription description =
{
  "simple_impose", 1.0,
  "CSTR",
  "Richard Caley <rjc@cstr.ed.ac.uk>",
  {
    "Simple prosody imposition which does linear selection of frames.",
     NULL
  },
  {
    { "Segment",	"Segments of the utterance."},
    { "Target",		"Intonation targets." },
    { "Unit",		"Units to be synthesised"},
    {NULL,NULL}
  },
  {
    { "Join",		"How to join overlapping units."},
    {NULL,NULL}
  },
  {
    { "ModifiedUnit",	"New units with frames selected and sized." },
    {NULL,NULL}
  },
  {
    { "ImposeVerbose",	mpt_bool,	"nil",	"Print trace of imposition process." },
    {NULL,NULL,NULL,NULL}
  }
};

static inline int irint(float f) { return int((f)+0.5); }
static inline float min(float a, float b) { return a<b?a:b;}

static float do_impose(FramesUnit::Chunk *chunk, 
		       float target_dur,
		       TargetTracker &tracker,
		       float start_time,
		       bool verbose)
{
    int orig_length = 0;
    int target_length;
    int sample_rate = -1;
    int bit_start[MAX_UNITS_IN_CHUNK+1];

    for(int i=0; i < chunk->n ; i++)
    {
      EST_Track *lpc = chunk->bit[i].unit->lpc();
      if (lpc)
	orig_length += sum_lengths(*lpc ,
				   chunk->bit[i].start_frame,
				   chunk->bit[i].end_frame);
      if (sample_rate <0 && chunk->bit[i].unit)
	sample_rate = chunk->bit[i].unit->sample_rate();
    }
    
    if (sample_rate <= 0)
      return -1;

    target_length = irint(target_dur*sample_rate);

    if (verbose)
      cout << "impose " << chunk->bit[0].unit->name() << " orig " << orig_length << " target " << target_length << "\n";

    float scale = (float)orig_length/(float)target_length;

    int at=0;
    float at_time=start_time;
    int bit=0, target_i=0;

    bit_start[bit]=0;
      

    EST_Track *frames =  chunk->bit[bit].unit->lpc();
    int orig_i = chunk->bit[bit].start_frame;
    int orig_i_start=0;
    int orig_i_length = irint(frames->a(orig_i, channel_length));


    while (at < target_length) 

    { 
	int l;

	int orig_at = irint(at*scale);

	while (orig_i_start + orig_i_length < orig_at)
	{
	    orig_i_start += orig_i_length;
	    orig_i ++;
	    if (orig_i >= chunk->bit[bit].end_frame)
	    {
		bit++;
		bit_start[bit] = target_i;
		frames = chunk->bit[bit].unit->lpc();
		orig_i = chunk->bit[bit].start_frame;
	    }
	    orig_i_length = irint(frames->a(orig_i, channel_length));
	}

	if (verbose)
	  cout << " orig " << orig_at << "[" << orig_i << "] target " << at << "[" << target_i << "]\n";

	float f0 = tracker.val_at(at_time);

	if (frames->a(orig_i, channel_voiced) > 0.5)
	    l = irint(sample_rate/f0);
	else 
	    l = irint(frames->a(orig_i, channel_length) * min(1.0,1.0/scale)) ;

	if (at + l/2 > target_length)
	  break;
    
	ImpositionBuffer::ensure(target_i);

	ImpositionBuffer::b()[target_i].bit = bit;
	ImpositionBuffer::b()[target_i].frame = orig_i;
	ImpositionBuffer::b()[target_i].length = l;
	ImpositionBuffer::b()[target_i].power = frames->a(orig_i, channel_power);

	at += l;
	at_time += (float)l/(float)sample_rate;
	target_i++;
    }
    bit_start[bit+1] = target_i;

    int check_length=0;

    for(int ii=0; ii<chunk->n; ii++)
    {
      if (verbose)
	cout << " build track " << ii << " start " << bit_start[ii] << " end " << bit_start[ii+1] << "\n";
	
	EST_Track *t = new EST_Track();
	
	t->assign_map(ImpositionTrackMap);
	t->resize(bit_start[ii+1]-bit_start[ii], 
		  ImpositionTrackMap.last_channel()+1);
	
	for(int j=bit_start[ii], k=0; j<bit_start[ii+1] ; j++, k++)
	{
	    if (ImpositionBuffer::b()[j].bit != ii)
		cerr << "wrong bit!\n";
	    
	    t->a(k, channel_frame) = ImpositionBuffer::b()[j].frame;
	    t->a(k, channel_length) = ImpositionBuffer::b()[j].length;
	    t->a(k, channel_power) = ImpositionBuffer::b()[j].power;
	}

	check_length += sum_lengths(*t);
	chunk->bit[ii].unit->add_modifications(t);
    }
    
    if (verbose)
      cout << check_length/(float)sample_rate << "\n";
    return at_time-start_time;
    
}

LISP simple_impose(LISP args)
{
    EST_Utterance *utt;
    
    EST_String segment_stream_name("Segment");
    EST_String target_stream_name("Target");
    EST_String unit_stream_name("Unit");
    EST_String join_stream_name("Join");
    EST_String modunit_stream_name("ModifiedUnit");
    
    EST_Stream *unit_stream=NULL, *join_stream=NULL, *target_stream=NULL, *segment_stream=NULL, 
    *modunit_stream=NULL;
    
    unpack_module_args(args, 
		       utt, 
		       segment_stream_name, segment_stream, sat_existing,
		       target_stream_name, target_stream, sat_existing,
		       unit_stream_name, unit_stream, sat_existing,
		       join_stream_name, join_stream, sat_as_is,
		       modunit_stream_name, modunit_stream, sat_replace);

    bool verbose = bool_parameter_get("ImposeVerbose");
    
    FramesUnit::Chunk *chunks = FramesUnit::chunk_utterance(utt, unit_stream, join_stream);
    
    if (verbose)
      FramesUnit::dump_chunks(cout, chunks);
    
    create_modunits_stream(chunks, unit_stream, modunit_stream);
    
    FramesUnit::Chunk *ch;
    EST_Stream_Item *seg_item=segment_stream->head();
    
    TargetTracker tracker(target_stream);
    float time=0.0;
    float excess=0.0;
    for(ch=chunks; ch->n != 0; ch++)
    {
	if (!seg_item)
	    err("too many chunks", NIL);
	
	float target_dur = seg_item->dur() - excess;
	
	float actual_dur = do_impose(ch, target_dur, tracker, time, verbose);

	if (actual_dur <0)
	  break;

	
	if (verbose)
	  cout << "durs: " << seg_item->name() << " start time " << time << " seg " << seg_item->dur() << " target " << target_dur << " actual " << actual_dur << "\n";
	seg_item = next(seg_item);
	time += actual_dur;
	excess = actual_dur - target_dur;
    }
    
    
    return NIL;
}

void simple_impose_init(void)
{
    proclaim_module("simple_impose", &description);
    init_module_subr("simple_impose", simple_impose, &description);
}
