/*************************************************************************/
/*                                                                       */
/*                Centre for Speech Technology Research                  */
/*                     University of Edinburgh, UK                       */
/*                       Copyright (c) 1996,1997                         */
/*                        All Rights Reserved.                           */
/*                                                                       */
/*  Permission to use, copy, modify, distribute this software and its    */
/*  documentation for research, educational and individual use only, is  */
/*  hereby granted without fee, subject to the following conditions:     */
/*   1. The code must retain the above copyright notice, this list of    */
/*      conditions and the following disclaimer.                         */
/*   2. Any modifications must be clearly marked as such.                */
/*   3. Original authors' names are not deleted.                         */
/*  This software may not be used for commercial purposes without        */
/*  specific prior written permission from the authors.                  */
/*                                                                       */
/*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
/*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
/*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
/*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
/*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
/*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
/*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
/*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
/*  THIS SOFTWARE.                                                       */
/*                                                                       */
/*************************************************************************/
/*                      Author :  Paul Taylor                            */
/*                      Date   :  July 1995                              */
/*-----------------------------------------------------------------------*/
/*                                                                       */
/*              Klatt Duration Rules                                     */
/*                                                                       */
/*=======================================================================*/

/*
This is an implementation of the Klatt rule system as described in
chapter 9 of "From text to speech: The MITalk system", Allen,
Hunnicutt and Klatt.

The function klatt_seg_dur() calculates a duration for each
segment in the input. It does this by calling a number
of rules (named 1 to 11) as defined in the MITalk book. Most
rules return a number which modifies the inherenent duration of
each segment. The original rules are set up so as to return
a percentage, here the system retursn a floating point value
which I think is neater.
*/

#include <stdio.h>
#include "festival.h"
#include "duration.h"

static void klatt_dur_debug(EST_Utterance &u, EST_Stream_Item &s);

static float rule2(EST_Utterance &u, EST_Stream_Item &seg);
static float rule3(EST_Utterance &u, EST_Stream_Item &seg);
static float rule4(EST_Utterance &u, EST_Stream_Item &seg);
static float rule5(EST_Utterance &u, EST_Stream_Item &seg);
static float rule6(EST_Utterance &u, EST_Stream_Item &seg);
static float rule7(EST_Utterance &u, EST_Stream_Item &seg);
static float rule8(EST_Utterance &u, EST_Stream_Item &seg);
static float rule9(EST_Utterance &u, EST_Stream_Item &seg);
static float rule10(EST_Utterance &u, EST_Stream_Item &seg);
static float rule9a(EST_Utterance &u, EST_Stream_Item &seg);
static float sub_rule9a(const EST_String &ph);

static int klatt_seg_dur(EST_Utterance &u, EST_Stream_Item &seg);
static float min_dur(EST_Stream_Item &s_seg);
static float inher_dur(EST_Stream_Item &s_seg);

int onset(EST_Utterance &u, EST_Stream_Item &seg);

static LISP klatt_params = NIL;
static int debug = 0;

LISP FT_Duration_Klatt_Utt(LISP utt)
{
    // Predict fixed duration on segments
    EST_Utterance *u = GETUTTVAL(utt);
    EST_Stream_Item *s;

    *cdebug << "Duration Klatt module\n";

    klatt_params = siod_get_lval("duration_klatt_params",
				 "no klatt duration params");

    for (s=(u->stream("Segment")).head(); s != 0; s = next(s))
	klatt_seg_dur(*u,*s);

    dur_fill_x_from_y(*u,"Syllable","Segment");
    dur_fill_x_from_y(*u,"Word","Syllable");
    dur_fill_x_from_y(*u,"IntEvent","Syllable");
    dur_fill_x_from_y(*u,"Phrase","Word");

    return utt;
}

static int klatt_seg_dur(EST_Utterance &u, EST_Stream_Item &seg)
{
    float min;
    float fact = 1.0;
    float start, dur;
    EST_Stream_Item *ps; // previous segment pointer.
    float duration_speed = dur_get_stretch_at_seg(u,seg);

    ps = prev(&seg);
    start = (ps == 0) ? 0.0 : ps->end();

    if (ph_is_silence(seg.name()))
	dur = 0.250 * duration_speed;
    else
    {
	if (debug) klatt_dur_debug(u,seg);
	fact *= rule2(u, seg) * rule3(u, seg) * rule4(u, seg) * rule5(u, seg) 
	    * rule6(u, seg) * rule7(u, seg) * rule8(u, seg) * 
		rule9(u,seg) * rule10(u, seg);

	min = (rule7(u, seg) != 1.0) ? min_dur(seg)/2: min_dur(seg);

	dur = ((((inher_dur(seg) - min) * fact) + min) / 1000.0) 
	    * duration_speed;
    }

    seg.set_end(start + dur);

    return 0;
}

static float min_dur(EST_Stream_Item &seg)
{
    LISP p = siod_assoc_str(seg.name(),klatt_params);
    
    if (p == NIL)
    {
	cerr << "Klatt_Duration: no minimum duration for \"" << seg.name()
	    << "\"\n";
	festival_error();
    }

    return get_c_float(car(cdr(cdr(p))));
}

static float inher_dur(EST_Stream_Item &seg)
{
    LISP p = siod_assoc_str(seg.name(),klatt_params);
    
    if (p == NIL)
    {
	cerr << "Klatt_Duration: no minimum duration for \"" << seg.name()
	    << "\"\n";
	festival_error();
    }

    return get_c_float(car(cdr(p)));
}

static int word_final(EST_Utterance &u, EST_Stream_Item &seg)
{
    // True if this segment is the last in a word
    int last_seg;
    
    last_seg = ffeature(u,seg,"Syllable.Word.Syllable:last.Segment:last.addr");

    if (seg.addr() == last_seg)
	return TRUE;
    else
	return FALSE;
}

#if 0
static int phrase_final(EST_Utterance &u, EST_Stream_Item &seg)
{
    // True if this segment is the last in a word
    int last_seg;
    
    last_seg = ffeature(u,seg,"Syllable.Word.Phrase.Word:last.Syllable:last.Segment:last.addr");

    if (seg.addr() == last_seg)
	return TRUE;
    else
	return FALSE;
}
#endif

static int syl_final(EST_Utterance &u, EST_Stream_Item &seg)
{
    // True if this segment is the last in a syllable
    int last_seg;
    
    last_seg = ffeature(u,seg,"Syllable.Segment:last.addr");

    if (seg.addr() == last_seg)
	return TRUE;
    else
	return FALSE;
}

static int word_initial(EST_Utterance &u, EST_Stream_Item &seg)
{
    // True if this segment is the first in a word
    int first_seg;
    
    first_seg = ffeature(u,seg,"Syllable.Word.Syllable.Segment.addr");

    if (seg.addr() == first_seg)
	return TRUE;
    else
	return FALSE;
}

static int phrase_initial(EST_Utterance &u, EST_Stream_Item &seg)
{
    // True if this segment is the first in a phrase
    int first_seg;
    
    first_seg = ffeature(u,seg,"Syllable.Word.Phrase.Word.Syllable.Segment.addr");

    if (seg.addr() == first_seg)
	return TRUE;
    else
	return FALSE;
}

int onset(EST_Utterance &u, EST_Stream_Item &seg)
{
    if (ffeature(u,seg,"onsetcoda") == "onset")
	return 1;
    else 
	return 0;
}

int coda(EST_Utterance &u, EST_Stream_Item &seg)
{
    if (ffeature(u,seg,"onsetcoda") == "coda")
	return 1;
    else 
	return 0;
}

static float rule2(EST_Utterance &u, EST_Stream_Item &seg)
{   // clause final lengthening
    EST_Stream_Item syl, phr;

    if (coda(u,seg))
    {
	int b = ffeature(u,seg,"Syllable.syl_break");
	if ((b > 1) && (b < 4))
	    return 1.4;
    }
    return 1.0;

}

static float rule3(EST_Utterance &u, EST_Stream_Item &seg)
{   // Non-phrase-final shortening 
    // syllabic segments are shortened by 60 if not in a phrase-final syllable
    int b = ffeature(u,seg,"Syllable.syl_break");

    if ((b < 2) && ph_is_syllabic(seg.name()))
	return 0.6;

    // A phrase-final postvocalic liquid or nasal is lengthened by 140
    if ((b == 4) && (ph_is_liquid(seg.name()) || ph_is_nasal(seg.name())))
	return(1.4);

    return 1.0;
}


static float rule4(EST_Utterance &u, EST_Stream_Item &seg) 
{   // Non-word-final shortening
    int b = ffeature(u,seg,"Syllable.syl_break");

    // Syllabic segments are shortened by 85 if not in a word-final syllable
    if ((b == 0) && ph_is_syllabic(seg.name()))
	return(0.85);

    return 1.0;
}

static float rule5(EST_Utterance &u, EST_Stream_Item &seg)
{   // Polysyllabic Shortening
    int num_syls = ffeature(u,seg,"Syllable.Word.Syllable:num");

    // Syllabic segments in a polysyllabic word are shortened by 80.
    if ((num_syls > 1) && ph_is_syllabic(seg.name()))
	return 0.8;

    return 1.0;
}

static float rule6(EST_Utterance &u, EST_Stream_Item &seg)
{   // Non-initial-consonant shortening

    if (!word_initial(u,seg) && (ph_is_consonant(seg.name())))
	return 0.85;

    return 1.0;
}

static float rule7(EST_Utterance &u, EST_Stream_Item &seg)
{   // Unstressed shortening

    if (ffeature(u,seg,"Syllable.stress") == 1)
	return 1.0;

    if (ph_is_syllabic(seg.name()))
    {
	if (word_initial(u, seg) || word_final(u, seg))
	    return 0.7;
	else
	    return 0.5;
    }

    if (onset(u, seg) && ph_is_liquid(seg.name())) // or glide...
	return 0.1;
    
    return 0.7;
}

// Lengthening for emphasis
static float rule8(EST_Utterance &u, EST_Stream_Item &seg) 
{ 

    if (!ph_is_vowel(seg.name()))
	return  1.0;

    if (ffeature(u,seg,"Syllable.accented") == 1)
	return 1.4;

    return 1.0;
}

// this is really rule 9b, but its eaiser to make it call rule 9a

static float rule9(EST_Utterance &u, EST_Stream_Item &seg) 
{   // Postvocalic context of vowels */
    int b = ffeature(u,seg,"Syllable.syl_break");
    
    if (b > 1)
	return (0.7 + (0.3 * rule9a(u, seg)));
    else
	return rule9a(u, seg);
}


static float rule9a(EST_Utterance &u, EST_Stream_Item &seg)
{   // Postvocalic context of vowels 
    EST_Stream_Item *s_next;

    if (ph_is_vowel(seg.name()))
    {
	if (syl_final(u,seg))
	    return 1.2;
	s_next = next(&seg);
	if (syl_final(u,*s_next))
	    return sub_rule9a(s_next->name());
	else if ((ph_is_sonorant(s_next->name())) &&
		 (ph_is_obstruent(next(s_next)->name())))
	    return sub_rule9a(next(s_next)->name());
    }
    else if (onset(u,seg))
	return 1.0;
    else if (ph_is_sonorant(seg.name()))
    {
	if (syl_final(u,seg))
	    return 1.2;
	s_next = next(&seg);
	if (ph_is_obstruent(s_next->name()))
	    return sub_rule9a(s_next->name());
    }

    return 1.0;
}

// sub rule, independent of seg position
static float sub_rule9a(const EST_String &ph)
{
    if (ph_is_voiced(ph))
    {
	if (ph_is_fricative(ph))
	    return 1.6;
	else if (ph_is_stop(ph))
	    return 1.2;
	else if (ph_is_nasal(ph))
	    return  0.85;
	else 
	    return 1.0;
    }
    else if (ph_is_stop(ph))
	return 0.7;
    else
	return 1.0;
}

// Shortening in clusters

static float rule10(EST_Utterance &u, EST_Stream_Item &seg) 
{
    int b = ffeature(u,seg,"Syllable.syl_break");

    if (syl_final(u,seg) && (b > 1))
	return 1.0;
    else
    {
	if (ph_is_vowel(seg.name()))
	{
	    if (ph_is_vowel(next(&seg)->name()))
		return 1.20;
	    else if ((!phrase_initial(u,seg)) &&
		     (ph_is_vowel(prev(&seg)->name())))
		return 0.70;
	    else
		return 1.0;
	}
	else if (ph_is_consonant(next(&seg)->name()))
	    if (!phrase_initial(u,seg) &&
		(ph_is_consonant(prev(&seg)->name())))
		return 0.5;
	    else
		return 0.7;
	else if (!phrase_initial(u,seg) &&
		(ph_is_consonant(prev(&seg)->name())))
	    return 0.7;
    }

    return 1.0;
}


static void klatt_dur_debug(EST_Utterance &u, EST_Stream_Item &seg)
{
    float f;
    if ((f = rule2(u, seg))!= 1.0) cout << "Fired rule  2 " << f << endl; 

    if ((f = rule3(u, seg))!= 1.0) cout << "Fired rule  3 " << f << endl;
    if ((f = rule4(u, seg))!= 1.0) cout << "Fired rule  4 " << f << endl;
    if ((f = rule5(u, seg))!= 1.0) cout << "Fired rule  5 " << f << endl;
    if ((f = rule6(u, seg))!= 1.0) cout << "Fired rule  6 " << f << endl;
    if ((f = rule7(u, seg))!= 1.0) cout << "Fired rule  7 " << f << endl;
    if ((f = rule8(u, seg))!= 1.0) cout << "Fired rule  8 " << f << endl;
    if ((f = rule9(u, seg))!= 1.0) cout << "Fired rule  9 " << f << endl;
    if ((f = rule10(u, seg))!= 1.0) cout << "Fired rule 10" << f << endl;

    return;
}


