
/*===========================================================================*/

/*
 *  Copyright (C) 1997 Jason Hutchens
 *
 *  This program is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU General Public License as published by the Free
 *  Software Foundation; either version 2 of the license or (at your option)
 *  any later version.
 *
 *  This program is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the Gnu Public License for more
 *  details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*===========================================================================*/

/*
 *		$Id: megahal.c,v 1.8 1997/12/24 03:17:01 hutch Exp hutch $
 *
 *		File:			megahal.c
 *
 *		Program:		MegaHAL v8
 *
 *		Purpose:		To simulate a natural language conversation with a psychotic
 *						computer.  This is achieved by learning from the user's
 *						input using a third-order Markov model on the word level.
 *						Words are considered to be sequences of characters separated
 *						by whitespace and punctuation.  Replies are generated
 *						randomly based on a keyword, and they are scored using
 *						measures of surprise.
 *
 *		Author:		Mr. Jason L. Hutchens
 *
 *		WWW:			http://ciips.ee.uwa.edu.au/~hutch/hal/
 *
 *		E-Mail:		hutch@ciips.ee.uwa.edu.au
 *
 *		Contact:		The Centre for Intelligent Information Processing Systems
 *						Department of Electrical and Electronic Engineering
 *						The University of Western Australia
 *						AUSTRALIA 6907
 *
 *		Phone:		+61-8-9380-3856
 *
 *		Facsimile:	+61-8-9380-1168
 *
 *		Notes:		This file is best viewed with tabstops set to three spaces.
 */

/*===========================================================================*/

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <malloc.h>
#include <string.h>
#include <signal.h>
#include <math.h>
#include <time.h>
#include <ctype.h>
#include <sys/types.h>

/*===========================================================================*/

#define P_THINK 40
#define D_KEY 100000
#define V_KEY 50000
#define D_THINK 500000
#define V_THINK 250000

#define MIN(a,b) ((a)<(b))?(a):(b)
#define srandom(a) srand(a)
#define random() rand()

#define COOKIE "MegaHALv8"

#define COMMAND_SIZE (sizeof(command)/sizeof(command[0]))

#define BYTE1 unsigned char
#define BYTE2 unsigned short
#define BYTE4 unsigned long

/*===========================================================================*/

typedef enum { FALSE, TRUE } bool;

typedef struct {
	BYTE1 length;
	char *word;
} STRING;

typedef struct {
	BYTE4 size;
	STRING *entry;
	BYTE2 *index;
} DICTIONARY;

typedef struct {
	BYTE2 size;
	STRING *from;
	STRING *to;
} SWAP;

typedef struct NODE {
	BYTE2 symbol;
	BYTE4 usage;
	BYTE2 count;
	BYTE2 branch;
	struct NODE **tree;
} TREE;

typedef struct {
	BYTE1 order;
	TREE *forward;
	TREE *backward;
	TREE **context;
	DICTIONARY *dictionary;
} MODEL;

typedef enum { UNKNOWN, QUIT, SETUP, JUDGE, DELAY } COMMAND_WORDS;

typedef struct {
	STRING word;
	COMMAND_WORDS command;
} COMMAND;

/*===========================================================================*/

void add_aux(MODEL *, DICTIONARY *, STRING);
void add_key(MODEL *, DICTIONARY *, STRING);
void add_node(TREE *, TREE *, int);
void add_swap(SWAP *, char *, char *);
TREE *add_symbol(TREE *, BYTE2);
BYTE2 add_word(DICTIONARY *, STRING);
int babble(MODEL *, DICTIONARY *, DICTIONARY *);
bool boundary(char *, int);
void capitalize(char *);
void change_judge(int);
void clear_screen(void);
void delay(char *);
void die(int);
bool dissimilar(DICTIONARY *, DICTIONARY *);
bool error(char *, char *, ...);
float evaluate_reply(MODEL *, DICTIONARY *, DICTIONARY *);
COMMAND_WORDS execute_command(DICTIONARY *, bool);
void exithal(void);
TREE *find_symbol(TREE *, int);
TREE *find_symbol_add(TREE *, int);
BYTE2 find_word(DICTIONARY *, STRING);
void free_dictionary(DICTIONARY *);
char *format_output(char *);
char *generate_reply(MODEL *, DICTIONARY *);
void ignore(int);
void initialize_context(MODEL *);
void initialize_dictionary(DICTIONARY *);
bool initialize_error(char *);
DICTIONARY *initialize_list(char *);
bool initialize_status(char *);
SWAP *initialize_swap(char *);
void learn(MODEL *, DICTIONARY *);
void load_dictionary(FILE *, DICTIONARY *);
bool load_model(char *, MODEL *);
void load_tree(FILE *, TREE *);
void load_word(FILE *, DICTIONARY *);
void make_greeting(DICTIONARY *);
DICTIONARY *make_keywords(MODEL *, DICTIONARY *);
char *make_output(DICTIONARY *);
void make_words(char *, DICTIONARY *);
DICTIONARY *new_dictionary(void);
MODEL *new_model(int);
TREE *new_node(void);
SWAP *new_swap(void);
bool print_header(FILE *);
void prompt_judge(void);
char *read_input(char *);
DICTIONARY *reply(MODEL *, DICTIONARY *);
void save_dictionary(FILE *, DICTIONARY *);
void save_model(char *, MODEL *);
void save_tree(FILE *, TREE *);
void save_word(FILE *, STRING);
int search_dictionary(DICTIONARY *, STRING, bool *);
int search_node(TREE *, int, bool *);
int seed(MODEL *, DICTIONARY *);
void setup(MODEL *model);
void show_dictionary(DICTIONARY *);
bool status(char *, ...);
void train(MODEL *, char *);
void typein(char);
void update_context(MODEL *, int);
void update_model(MODEL *, int);
char *update_output(char *, char *, bool);
void upper(char *);
void usleep(int);
int wordcmp(STRING, STRING);
bool word_exists(DICTIONARY *, STRING);
void write_input(char *);
void write_output(char *);

/*===========================================================================*/

int width=75;
int order=5;
int timeout=2;
int judge=0;
bool typing_delay=TRUE;
DICTIONARY *ban=NULL;
DICTIONARY *aux=NULL;
DICTIONARY *fin=NULL;
DICTIONARY *grt=NULL;
SWAP *swp=NULL;
FILE *errorfp=stderr;
FILE *statusfp=stdout;
bool used_key;

COMMAND command[] = {
	{ { 1, "X" }, QUIT },
	{ { 1, "D" }, DELAY },
	{ { 1, "T" }, SETUP }
};

/*===========================================================================*/

/*
 *		Function:	Main
 *
 *		Purpose:		Initialise everything, and then do an infinite loop.  In
 *						the loop, we read the user's input and reply to it, and
 *						do some housekeeping task such as responding to special
 *						commands.
 */
int main(int argc, char **argv)
{
	char *input=NULL;
	char *output=NULL;
	DICTIONARY *words=NULL;
	MODEL *model=NULL;

	/*
	 *		Check that the type sizes are okay
	 */
	if(sizeof(BYTE1)!=1) {
		error("main", "BYTE1 size is %d", sizeof(BYTE1));
		return(0);
	}
	if(sizeof(BYTE2)!=2) {
		error("main", "BYTE2 size is %d", sizeof(BYTE2));
		return(0);
	}
	if(sizeof(BYTE4)!=4) {
		error("main", "BYTE4 size is %d", sizeof(BYTE4));
		return(0);
	}

	/*
	 *		Do some initialisation 
	 */
	initialize_error("megahal.log");
	srandom(time(NULL));
	ignore(0);

	/*
	 *		Create a dictionary which will be used to hold the segmented
	 *		version of the user's input.
	 */
	words=new_dictionary();

	/*
	 *		Create a language model.
	 */
	model=new_model(order);

	/*
	 *		Train the model on a text if one exists
	 */
	if(load_model("megahal.brn", model)==FALSE) train(model, "megahal.trn");

	/*
	 *		Read a dictionary containing banned keywords, auxiliary keywords,
	 *		greeting keywords and swap keywords
	 */
	ban=initialize_list("megahal.ban");
	aux=initialize_list("megahal.aux");
	grt=initialize_list("megahal.grt");
	swp=initialize_swap("megahal.swp");

	/*
	 *		Read input, formulate a reply and display it as output
	 */
	setup(model);
	while(TRUE) {
		input=read_input("> ");
		write_input(input);
		upper(input);
		make_words(input,words);
		learn(model, words);

		/*
		 *		If the input was a command, then execute it
		 */
		switch(execute_command(words, TRUE)) {
			case QUIT:
				save_model("megahal.brn", model);
				exithal();
			case SETUP:
				prompt_judge();
				make_greeting(words);
				break;
			case JUDGE:
				save_model("megahal.brn", model);
				make_greeting(words);
				break;
			default:
				break;	
		}

		output=generate_reply(model, words);
		write_output(output);
	}

	return(0);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Execute_Command
 *
 *		Purpose:		Detect whether the user has typed a command, and
 *						execute the corresponding function.
 */
COMMAND_WORDS execute_command(DICTIONARY *words, bool allow_judge)
{
	register int i;
	register int j;
	STRING word={ 2, "@@" };
	int tmp;

	/*
	 *		If there is only one word, then it can't be a command.
	 */
	if(words->size<=1) return(UNKNOWN);

	/*
	 *		Search through the word array.  If a command prefix is found,
	 *		then try to match the following word with a command word.  If
	 *		a match is found, then return a command identifier.  If the
	 *		Following word is a number, then change the judge.  Otherwise,
	 *		continue the search.
	 */
	for(i=0; i<words->size-1; ++i)
		/*
		 *		The command prefix was found.
		 */
		if(wordcmp(word, words->entry[i])==0) {
			/*
			 *		Look for a command word.
			 */
			for(j=0; j<COMMAND_SIZE; ++j)
				if(wordcmp(command[j].word, words->entry[i+1])==0)
					return(command[j].command);
			/*
			 *		Look for a judge number.
			 */
			if(allow_judge==TRUE) {
				tmp=atoi(words->entry[i+1].word);
				if((tmp>0)||((tmp==0)&&(words->entry[i+1].word[0]=='0'))) {
					change_judge(tmp);
					return(JUDGE);
				}
			}
		}

	return(UNKNOWN);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Setup
 *
 *		Purpose:		Enter the configuration mode.
 */
void setup(MODEL *model)
{
	char buffer[1024];
	char *input=NULL;
	char *output=NULL;
	static DICTIONARY *words=NULL;
	char *filename;

	/*
	 *		Allocate a dictionary if necessary, make sure it is empty, clear
	 *		the screen, and set the judge number to zero.
	 */
	if(words==NULL) words=new_dictionary();
	words->size=0;

	/*
	 *		Get the filename for logging purposes.
	 */
	fprintf(stdout, "Filename: ");
	fflush(stdout);
	fgets(buffer, 1024, stdin);
	filename=strtok(buffer, " \n\t");
	if(initialize_status(filename)==FALSE) initialize_status("megahal.txt");

	/*
	 *		Display a greeting message to the judge.
	 */
	change_judge(0);
	make_greeting(words);
	output=generate_reply(model, words);
	write_output(output);

	/*
	 *		Loop until prompt_judge() is executed.
	 */
	while(judge==0) {
		input=read_input("+ ");
		write_input(input);
		upper(input);
		make_words(input, words);

		switch(execute_command(words, FALSE)) {
			case DELAY:
				typing_delay=(typing_delay==TRUE)?FALSE:TRUE;
				break;
			case SETUP:
				prompt_judge();
				make_greeting(words);
				break;
			default:
				break;
		}

		output=generate_reply(model, words);
		write_output(output);
	}
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Prompt_Judge
 *
 *		Purpose:		Read a judge number, and change the judge accordingly.
 */
void prompt_judge(void)
{
	char *input=NULL;
	static DICTIONARY *words=NULL;
	int num=judge;

	/*
	 *		Allocate a dictionary if necessary, make sure it is empty, clear
	 *		the screen, and set the judge number to zero.
	 */
	if(words==NULL) words=new_dictionary();
	words->size=0;
	clear_screen();

	/*
	 *		Loop until the judge number changes.
	 */
	while(num==judge) {
		input=read_input("? ");
		upper(input);
		make_words(input, words);
		switch(execute_command(words, TRUE)) {
			case QUIT:
				exithal();
			default:
				break;
		}
	}
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	ExitHAL
 *
 *		Purpose:		Terminate the program.
 */
void exithal(void)
{
	exit(0);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Clear_Screen
 *
 *		Purpose:		Clear an MS-DOS shell screen; necessary because
 *						system("CLS") doesn't work, nor does printf("\f")
 *						Bloody Microsoft!
 */
void clear_screen(void)
{
	register int i;

	for(i=0; i<25; ++i) fprintf(stdout, "\n");
	fflush(stdout);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Read_Input
 *
 *		Purpose:		Read an input string from the user.
 */
char *read_input(char *prompt)
{
	static char *input=NULL;
	char *tmp;
	bool finish;
	int length;
	int c;

	/*
	 *		Perform some initializations.  The finish boolean variable is used
	 *		to detect a double line-feed, while length contains the number of
	 *		characters in the input string.
	 */
	finish=FALSE;
	length=0;
	if(input==NULL) {
		input=(char *)malloc(sizeof(char));
		if(input==NULL) {
			error("read_input", "Unable to allocate the input string");
			return(input);
		}
	}

	/* 
	 *		Display the prompt to the user.
	 */
	fprintf(stdout, prompt);
	fflush(stdout);

	/*
	 *		Loop forever, reading characters and putting them into the input
	 *		string.
	 */
	while(TRUE) {

		/*
		 *		Read a single character from stdin.
		 */
		c=getc(stdin);

		/*
		 *		If the character is a line-feed, then set the finish variable
		 *		to TRUE.  If it already is TRUE, then this is a double line-feed,
		 *		in which case we should exit.  After a line-feed, display the
		 *		prompt again, and set the character to the space character, as
		 *		we don't permit linefeeds to appear in the input.
		 */
		if((char)(c)=='\n') {
			if(finish==TRUE) break;
			fprintf(stdout, prompt);
			fflush(stdout);
			finish=TRUE;
			c=32;
		} else {
			finish=FALSE;
		}

		/*
		 *		Re-allocate the input string so that it can hold one more
		 *		character.
		 */
		++length;
		tmp=(char *)realloc((char *)input,sizeof(char)*(length+1));
		if(tmp==NULL) {
			error("read_input", "Unable to re-allocate the input string");
			return(input);
		} else {
			input=tmp;
		}

		/*
		 *		Add the character just read to the input string.
		 */
		input[length-1]=(char)c;
		input[length]='\0';
	}

	while(isspace(input[length-1])) --length;
	input[length]='\0';

	/*
	 *		We have finished, so return the input string.
	 */
	return(input);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Initialize_Error
 *
 *		Purpose:		Close the current error file pointer, and open a new one.
 */
bool initialize_error(char *filename)
{
	if(errorfp!=stderr) fclose(errorfp);
	if(filename==NULL) return(TRUE);
	errorfp=fopen(filename, "a");
	if(errorfp==NULL) {
		errorfp=stderr;
		return(FALSE);
	}
	return(print_header(errorfp));
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Error
 *
 *		Purpose:		Print the specified message to the error file.
 */
bool error(char *title, char *fmt, ...)
{
	va_list argp;

	fprintf(errorfp, "%s: ", title);
	va_start(argp, fmt);
	vfprintf(errorfp, fmt, argp);
	va_end(argp);
	fprintf(errorfp, ".\n");
	fflush(errorfp);

	return(TRUE);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Initialize_Status
 *
 *		Purpose:		Close the current status file pointer, and open a new one.
 */
bool initialize_status(char *filename)
{
	if(statusfp!=stdout) fclose(statusfp);
	if(filename==NULL) return(FALSE);
	statusfp=fopen(filename, "a");
	if(statusfp==NULL) {
		statusfp=stdout;
		return(FALSE);
	}
	return(print_header(statusfp));
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Status
 *
 *		Purpose:		Print the specified message to the status file.
 */
bool status(char *fmt, ...)
{
	va_list argp;

	va_start(argp, fmt);
	vfprintf(statusfp, fmt, argp);
	va_end(argp);
	fflush(statusfp);

	return(TRUE);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Print_Header
 *
 *		Purpose:		Display a copyright message and timestamp.
 */
bool print_header(FILE *file)
{
	time_t clock;
	char timestamp[1024];
	struct tm *local;

	clock=time(NULL);
	local=localtime(&clock);
	strftime(timestamp, 1024, "Start at: [%Y/%m/%d %H:%M:%S]\n", local);

	fprintf(file, "(c)1998 Cambridge Center For Behavioral Studies all "
	"rights reserved\n");
	fprintf(file, "[MegaHALv8][Jason Hutchens]\n");
	fprintf(file, timestamp);
	fflush(file);

	return(TRUE);
}

/*---------------------------------------------------------------------------*/

/*
 *    Function:   Write_Output
 *
 *    Purpose:    Display the output string.
 */
void write_output(char *output)
{
	time_t clock;
	char timestamp[1024];
	struct tm *local;
   char *formatted;
   char *bit;
 
	capitalize(output);
	clock=time(NULL);
	local=localtime(&clock);
	strftime(timestamp, 1024, "PROGRAM[%H:%M:%S]", local);

	width=75;
	formatted=format_output(output);
	delay(formatted);
	width=60;
	formatted=format_output(output);
 
	bit=strtok(formatted, "\n");
	if(bit==NULL) (void)status("%s%s\n", timestamp, formatted);
	while(bit!=NULL) {
		(void)status("%s%s\n", timestamp, bit);
		bit=strtok(NULL, "\n");
	}
}
 
/*---------------------------------------------------------------------------*/

/*
 *		Function:	Capitalize
 *
 *		Purpose:		Convert a string to look nice.
 */
void capitalize(char *string)
{
	register int i;
	bool start=TRUE;

	for(i=0; i<(int)strlen(string); ++i) {
		if(isalpha(string[i])) {
			if(start==TRUE) string[i]=(char)toupper((int)string[i]);
			else string[i]=(char)tolower((int)string[i]);
			start=FALSE;
		}
		if((i>2)&&(strchr("!.?", string[i-1])!=NULL)&&(isspace(string[i])))
			start=TRUE;
	}
}
 
/*---------------------------------------------------------------------------*/

/*
 *		Function:	Upper
 *
 *		Purpose:		Convert a string to its uppercase representation.
 */
void upper(char *string)
{
	register int i;

	for(i=0; i<(int)strlen(string); ++i) string[i]=(char)toupper((int)string[i]);
}
 
/*---------------------------------------------------------------------------*/

/*
 *    Function:   Write_Input
 *
 *    Purpose:    Log the user's input
 */
void write_input(char *input)
{
	time_t clock;
	char timestamp[1024];
	char tmp[1024];
	struct tm *local;
   char *formatted;
   char *bit;
 
	clock=time(NULL);
	local=localtime(&clock);
	strftime(tmp, 1024, "[%H:%M:%S]", local);
	sprintf(timestamp, "JUDGE%02d%s", judge, tmp);

	width=60;
   formatted=format_output(input);

   bit=strtok(formatted, "\n");
	if(bit==NULL) (void)status("%s%s\n", timestamp, formatted);
   while(bit!=NULL) {
      (void)status("%s%s\n", timestamp, formatted);
      bit=strtok(NULL, "\n");
   }
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Change_Judge
 *
 *		Purpose:		Perform the necessary functions when changing a judge.
 */
void change_judge(int num)
{
	judge=num;
	(void)status("*** JUDGE%02d ***\n", judge);
}

/*---------------------------------------------------------------------------*/

/*
 *    Function:   Format_Output
 *
 *    Purpose:    Format a string to display nicely on a terminal of a given
 *                width.
 */
char *format_output(char *output)
{
   static char *formatted=NULL;
   register int i,j,c;
   int l;

   if(formatted==NULL) {
      formatted=(char *)malloc(sizeof(char));
      if(formatted==NULL) {
         error("format_output", "Unable to allocate formatted");
         return("ERROR");
      }
   }   

   formatted=(char *)realloc((char *)formatted, sizeof(char)*(strlen(output)+2));
   if(formatted==NULL) {
      error("format_output", "Unable to re-allocate formatted");
      return("ERROR");
   }

   l=0;
	j=0;
   for(i=0; i<(int)strlen(output); ++i) {
      if((l==0)&&(isspace(output[i]))) continue;
      formatted[j]=output[i];
      ++j;
      ++l;
      if(l>=width)
         for(c=j-1; c>0; --c)
            if(formatted[c]==' ') {
               formatted[c]='\n';
               l=j-c-1;
               break;
            }
   }
	if((j>0)&&(formatted[j-1]!='\n')) {
		formatted[j]='\n';
		++j;
	}
   formatted[j]='\0';

   return(formatted);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Add_Word
 *
 *		Purpose:		Add a word to a dictionary, and return the identifier
 *						assigned to the word.  If the word already exists in
 *						the dictionary, then return its current identifier
 *						without adding it again.
 */
BYTE2 add_word(DICTIONARY *dictionary, STRING word)
{
	register int i;
	int position;
	bool found;

	/* 
	 *		If the word's already in the dictionary, there is no need to add it
	 */
	position=search_dictionary(dictionary, word, &found);
	if(found==TRUE) goto succeed;

	/* 
	 *		Increase the number of words in the dictionary
	 */
	dictionary->size+=1;

	/*
	 *		Allocate one more entry for the word index
	 */
	if(dictionary->index==NULL) {
		dictionary->index=(BYTE2 *)malloc(sizeof(BYTE2)*
		(dictionary->size));
	} else {
		dictionary->index=(BYTE2 *)realloc((BYTE2 *)
		(dictionary->index),sizeof(BYTE2)*(dictionary->size));
	}
	if(dictionary->index==NULL) {
		error("add_word", "Unable to reallocate the index.");
		goto fail;
	}

	/*
	 *		Allocate one more entry for the word array
	 */
	if(dictionary->entry==NULL) {
		dictionary->entry=(STRING *)malloc(sizeof(STRING)*(dictionary->size));
	} else {
		dictionary->entry=(STRING *)realloc((STRING *)(dictionary->entry),
		sizeof(STRING)*(dictionary->size));
	}
	if(dictionary->entry==NULL) {
		error("add_word", "Unable to reallocate the dictionary.");
		goto fail;
	}

	/*
	 *		Copy the new word into the word array
	 */
	dictionary->entry[dictionary->size-1].length=word.length;
	dictionary->entry[dictionary->size-1].word=(char *)malloc(sizeof(char)*
	(word.length));
	if(dictionary->entry[dictionary->size-1].word==NULL) {
		error("add_word", "Unable to allocate the word.");
		goto fail;
	}
	for(i=0; i<word.length; ++i)
		dictionary->entry[dictionary->size-1].word[i]=word.word[i];

	/*
	 *		Shuffle the word index to keep it sorted alphabetically
	 */
	for(i=(dictionary->size-1); i>position; --i)
		dictionary->index[i]=dictionary->index[i-1];

	/*
	 *		Copy the new symbol identifier into the word index
	 */
	dictionary->index[position]=dictionary->size-1;

succeed:
	return(dictionary->index[position]);

fail:
	return(0);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Search_Dictionary
 *
 *		Purpose:		Search the dictionary for the specified word, returning its
 *						position in the index if found, or the position where it
 *						should be inserted otherwise.
 */
int search_dictionary(DICTIONARY *dictionary, STRING word, bool *find)
{
	int position;
	int min;
	int max;
	int middle;
	int compar;

	/*
	 *		If the dictionary is empty, then obviously the word won't be found
	 */
	if(dictionary->size==0) {
		position=0;
		goto notfound;
	}

	/*
	 *		Initialize the lower and upper bounds of the search
	 */
	min=0;
	max=dictionary->size-1;
	/*
	 *		Search repeatedly, halving the search space each time, until either
	 *		the entry is found, or the search space becomes empty
	 */
	while(TRUE) {
		/*
		 *		See whether the middle element of the search space is greater
		 *		than, equal to, or less than the element being searched for.
		 */
		middle=(min+max)/2;
		compar=wordcmp(word, dictionary->entry[dictionary->index[middle]]);
		/*
		 *		If it is equal then we have found the element.  Otherwise we
		 *		can halve the search space accordingly.
		 */
		if(compar==0) {
			position=middle;
			goto found;
		} else if(compar>0) {
			if(max==middle) {
				position=middle+1;
				goto notfound;
			}
			min=middle+1;
		} else {
			if(min==middle) {
				position=middle;
				goto notfound;
			}
			max=middle-1;
		}
	}

found:
	*find=TRUE;
	return(position);

notfound:
	*find=FALSE;
	return(position);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Find_Word
 *
 *		Purpose:		Return the symbol corresponding to the word specified.
 *						We assume that the word with index zero is equal to a
 *						NULL word, indicating an error condition.
 */
BYTE2 find_word(DICTIONARY *dictionary, STRING word)
{
	int position;
	bool found;

	position=search_dictionary(dictionary, word, &found);

	if(found==TRUE) return(dictionary->index[position]);
	else return(0);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Wordcmp
 *
 *		Purpose:		Compare two words, and return an integer indicating whether
 *						the first word is less than, equal to or greater than the
 *						second word.
 */
int wordcmp(STRING word1, STRING word2)
{
	register int i;
	int bound;

	bound=MIN(word1.length,word2.length);

	for(i=0; i<bound; ++i)
		if(word1.word[i]!=word2.word[i])
			return((int)(word1.word[i]-word2.word[i]));

	if(word1.length<word2.length) return(-1);
	if(word1.length>word2.length) return(1);

	return(0);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Free_Dictionary
 *
 *		Purpose:		Release the memory consumed by the dictionary.
 */
void free_dictionary(DICTIONARY *dictionary)
{
	register int i;

	if(dictionary->size==0) return;
	for(i=0; i<dictionary->size; ++i) free(dictionary->entry[i].word);
	free(dictionary->entry);
	dictionary->entry=NULL;
	free(dictionary->index);
	dictionary->index=NULL;
	dictionary->size=0;

	initialize_dictionary(dictionary);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Initialize_Dictionary
 *
 *		Purpose:		Add dummy words to the dictionary.
 */
void initialize_dictionary(DICTIONARY *dictionary)
{
	STRING word={ 7, "<ERROR>" };
	STRING end={ 5, "<FIN>" };

	(void)add_word(dictionary, word);
	(void)add_word(dictionary, end);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	New_Dictionary
 *
 *		Purpose:		Allocate room for a new dictionary.
 */
DICTIONARY *new_dictionary(void)
{
	DICTIONARY *dictionary=NULL;

	dictionary=(DICTIONARY *)malloc(sizeof(DICTIONARY));
	if(dictionary==NULL) {
		error("new_dictionary", "Unable to allocate dictionary.");
		return(NULL);
	}

	dictionary->size=0;
	dictionary->index=NULL;
	dictionary->entry=NULL;

	initialize_dictionary(dictionary);

	return(dictionary);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Save_Dictionary
 *
 *		Purpose:		Save a dictionary to the specified file.
 */
void save_dictionary(FILE *file, DICTIONARY *dictionary)
{
	register int i;

	fwrite(&(dictionary->size), sizeof(BYTE4), 1, file);
	for(i=0; i<dictionary->size; ++i) save_word(file, dictionary->entry[i]);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Load_Dictionary
 *
 *		Purpose:		Load a dictionary from the specified file.
 */
void load_dictionary(FILE *file, DICTIONARY *dictionary)
{
	register int i;
	int size;

	fread(&size, sizeof(BYTE4), 1, file);
	for(i=0; i<size; ++i) load_word(file, dictionary);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Save_Word
 *
 *		Purpose:		Save a dictionary word to a file.
 */
void save_word(FILE *file, STRING word)
{
	register int i;

	fwrite(&(word.length), sizeof(BYTE1), 1, file);
	for(i=0; i<word.length; ++i)
		fwrite(&(word.word[i]), sizeof(char), 1, file);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Load_Word
 *
 *		Purpose:		Load a dictionary word from a file.
 */
void load_word(FILE *file, DICTIONARY *dictionary)
{
	register int i;
	STRING word;

	fread(&(word.length), sizeof(BYTE1), 1, file);
	word.word=(char *)malloc(sizeof(char)*word.length);
	if(word.word==NULL) {
		error("load_word", "Unable to allocate word");
		return;
	}
	for(i=0; i<word.length; ++i)
		fread(&(word.word[i]), sizeof(char), 1, file);
	add_word(dictionary, word);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	New_Node
 *
 *		Purpose:		Allocate a new node for the n-gram tree, and initialise
 *						its contents to sensible values.
 */
TREE *new_node(void)
{
	TREE *node=NULL;

	/*
	 *		Allocate memory for the new node
	 */
	node=(TREE *)malloc(sizeof(TREE));
	if(node==NULL) {
		error("new_node", "Unable to allocate the node.");
		goto fail;
	}

	/*
	 *		Initialise the contents of the node
	 */
	node->symbol=0;
	node->usage=0;
	node->count=0;
	node->branch=0;
	node->tree=NULL;

	return(node);

fail:
	if(node!=NULL) free(node);
	return(NULL);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	New_Model
 *
 *		Purpose:		Create and initialise a new ngram model.
 */
MODEL *new_model(int order)
{
	MODEL *model=NULL;

	model=(MODEL *)malloc(sizeof(MODEL));
	if(model==NULL) {
		error("new_model", "Unable to allocate model.");
		goto fail;
	}

	model->order=order;
	model->forward=new_node();
	model->backward=new_node();
	model->context=(TREE **)malloc(sizeof(TREE *)*(order+2));
	if(model->context==NULL) {
		error("new_model", "Unable to allocate context array.");
		goto fail;
	}
	initialize_context(model);
	model->dictionary=new_dictionary();

	return(model);

fail:
	return(NULL);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Update_Model
 *
 *		Purpose:		Update the model with the specified symbol.
 */
void update_model(MODEL *model, int symbol)
{
	register int i;

	/*
	 *		Update all of the models in the current context with the specified
	 *		symbol.
	 */
	for(i=(model->order+1); i>0; --i)
		if(model->context[i-1]!=NULL)
			model->context[i]=add_symbol(model->context[i-1], (BYTE2)symbol);

	return;
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Update_Context
 *
 *		Purpose:		Update the context of the model without adding the symbol.
 */
void update_context(MODEL *model, int symbol)
{
	register int i;

	for(i=(model->order+1); i>0; --i)
		if(model->context[i-1]!=NULL)
			model->context[i]=find_symbol(model->context[i-1], symbol);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Add_Symbol
 *
 *		Purpose:		Update the statistics of the specified tree with the
 *						specified symbol, which may mean growing the tree if the
 *						symbol hasn't been seen in this context before.
 */
TREE *add_symbol(TREE *tree, BYTE2 symbol)
{
	TREE *node=NULL;

	/*
	 *		Search for the symbol in the subtree of the tree node.
	 */
	node=find_symbol_add(tree, symbol);

	/*
	 *		Increment the symbol counts
	 */
	if((node->count<65535)) {
		node->count+=1;
		tree->usage+=1;
	}

	return(node);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Find_Symbol
 *
 *		Purpose:		Return a pointer to the child node, if one exists, which
 *						contains the specified symbol.
 */
TREE *find_symbol(TREE *node, int symbol)
{
	register int i;
	TREE *found=NULL;
	bool found_symbol=FALSE;

	/* 
	 *		Perform a binary search for the symbol.
	 */
	i=search_node(node, symbol, &found_symbol);
	if(found_symbol==TRUE) found=node->tree[i];

	return(found);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Find_Symbol_Add
 *
 *		Purpose:		This function is conceptually similar to find_symbol,
 *						apart from the fact that if the symbol is not found,
 *						a new node is automatically allocated and added to the
 *						tree.
 */
TREE *find_symbol_add(TREE *node, int symbol)
{
	register int i;
	TREE *found=NULL;
	bool found_symbol=FALSE;

	/* 
	 *		Perform a binary search for the symbol.  If the symbol isn't found,
	 *		attach a new sub-node to the tree node so that it remains sorted.
	 */
	i=search_node(node, symbol, &found_symbol);
	if(found_symbol==TRUE) {
		found=node->tree[i];
	} else {
		found=new_node();
		found->symbol=symbol;
		add_node(node, found, i);
	}

	return(found);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Add_Node
 *
 *		Purpose:		Attach a new child node to the sub-tree of the tree
 *						specified.
 */
void add_node(TREE *tree, TREE *node, int position)
{
	register int i;

	/*
	 *		Allocate room for one more child node, which may mean allocating
	 *		the sub-tree from scratch.
	 */
	if(tree->tree==NULL) {
		tree->tree=(TREE **)malloc(sizeof(TREE *)*(tree->branch+1));
	} else {
		tree->tree=(TREE **)realloc((TREE **)(tree->tree),sizeof(TREE *)*
		(tree->branch+1));
	}
	if(tree->tree==NULL) {
		error("add_node", "Unable to reallocate subtree.");
		return;
	}

	/*
	 *		Shuffle the nodes down so that we can insert the new node at the
	 *		subtree index given by position.
	 */
	for(i=tree->branch; i>position; --i)
		tree->tree[i]=tree->tree[i-1];

	/*
	 *		Add the new node to the sub-tree.
	 */
	tree->tree[position]=node;
	tree->branch+=1;
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Search_Node
 *
 *		Purpose:		Perform a binary search for the specified symbol on the
 *						subtree of the given node.  Return the position of the
 *						child node in the subtree if the symbol was found, or the
 *						position where it should be inserted to keep the subtree
 *						sorted if it wasn't.
 */
int search_node(TREE *node, int symbol, bool *found_symbol)
{
	register int position;
	int min;
	int max;
	int middle;
	int compar;

	/*
	 *		Handle the special case where the subtree is empty.
	 */ 
	if(node->branch==0) {
		position=0;
		goto notfound;
	}

	/*
	 *		Perform a binary search on the subtree.
	 */
	min=0;
	max=node->branch-1;
	while(TRUE) {
		middle=(min+max)/2;
		compar=symbol-node->tree[middle]->symbol;
		if(compar==0) {
			position=middle;
			goto found;
		} else if(compar>0) {
			if(max==middle) {
				position=middle+1;
				goto notfound;
			}
			min=middle+1;
		} else {
			if(min==middle) {
				position=middle;
				goto notfound;
			}
			max=middle-1;
		}
	}

found:
	*found_symbol=TRUE;
	return(position);

notfound:
	*found_symbol=FALSE;
	return(position);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Initialize_Context
 *
 *		Purpose:		Set the context of the model to a default value.
 */
void initialize_context(MODEL *model)
{
	register int i;

	for(i=0; i<=model->order; ++i) model->context[i]=NULL;
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Update_Output
 *
 *		Purpose:		Add a new word either at the beginning or end of the
 *						current output string.
 */
char *update_output(char *output, char *word, bool prepend)
{
	register int i;
	int length;
	int lout;
	int lword;

	lout=strlen(output);
	lword=strlen(word);

	length=lout+lword+2;
	output=(char *)realloc((char *)output, sizeof(char)*length);
	if(output==NULL) {
		error("update_output", "Unable to re-allocate output");
		return("ERROR");
	}

	if(prepend==FALSE) {
		strcat(output, word);
	} else {
		for(i=lout; i>=0; --i)
			output[lword+i]=output[i];
		for(i=0; i<lword; ++i)
			output[i]=word[i];
	}

	return(output);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Learn
 *
 *		Purpose:		Learn from the user's input.
 */
void learn(MODEL *model, DICTIONARY *words)
{
	register int i;
	BYTE2 symbol;

	/*
	 *		We only learn from inputs which are long enough
	 */
	if(words->size<=(model->order)) return;

	/*
	 *		Train the model in the forwards direction.  Start by initializing
	 *		the context of the model.
	 */
	initialize_context(model);
	model->context[0]=model->forward;
	for(i=0; i<words->size; ++i) {
		/*
		 *		Add the symbol to the model's dictionary if necessary, and then
		 *		update the forward model accordingly.
		 */
		symbol=add_word(model->dictionary, words->entry[i]);
		update_model(model, symbol);
	}
	/*
	 *		Add the sentence-terminating symbol.
	 */
	update_model(model, 1);

	/*
	 *		Train the model in the backwards direction.  Start by initializing
	 *		the context of the model.
	 */
	initialize_context(model);
	model->context[0]=model->backward;
	for(i=words->size-1; i>=0; --i) {
		/*
		 *		Find the symbol in the model's dictionary, and then update
		 *		the backward model accordingly.
		 */
		symbol=find_word(model->dictionary, words->entry[i]);
		update_model(model, symbol);
	}
	/*
	 *		Add the sentence-terminating symbol.
	 */
	update_model(model, 1);

	return;
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Train
 *
 *		Purpose:	 	Infer a MegaHAL brain from the contents of a text file.
 */
void train(MODEL *model, char *filename)
{
	FILE *file;
	char buffer[1024];
	int length=0;
	DICTIONARY *words=NULL;

	if(filename==NULL) return;

	file=fopen(filename, "r");
	if(file==NULL) return;

	words=new_dictionary();

	while(!feof(file)) {

		if(fgets(buffer, 1024, file)==NULL) break;
		if(buffer[0]=='#') continue;

		buffer[strlen(buffer)-1]='\0';

		upper(buffer);
		make_words(buffer, words);
		learn(model, words);

	}

	free(words->entry);
	free(words);
	fclose(file);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Show_Dictionary
 *
 *		Purpose:		Display the dictionary for training purposes.
 */
void show_dictionary(DICTIONARY *dictionary)
{
	register int i;
	register int j;
	FILE *file;

	file=fopen("megahal.dic", "w");
	if(file==NULL) {
		error("show_dictionary", "Unable to open file");
		return;
	}

	for(i=0; i<dictionary->size; ++i) {
		for(j=0; j<dictionary->entry[i].length; ++j)
			fprintf(file, "%c", dictionary->entry[i].word[j]);
		fprintf(file, "\n");
	}

	fclose(file);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Save_Model
 *
 *		Purpose:		Save the current state to a MegaHAL brain file.
 */
void save_model(char *filename, MODEL *model)
{
	FILE *file;

	show_dictionary(model->dictionary);
	if(filename==NULL) return;

	file=fopen(filename, "wb");
	if(file==NULL) {
		error("save_model", "Unable to open file `%s'", filename);
		return;
	}

	fwrite(COOKIE, sizeof(char), strlen(COOKIE), file);
	fwrite(&(model->order), sizeof(BYTE1), 1, file);
	save_tree(file, model->forward);
	save_tree(file, model->backward);
	save_dictionary(file, model->dictionary);

	fclose(file);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Save_Tree
 *
 *		Purpose:		Save a tree structure to the specified file.
 */
void save_tree(FILE *file, TREE *node)
{
	register int i;

	fwrite(&(node->symbol), sizeof(BYTE2), 1, file);
	fwrite(&(node->usage), sizeof(BYTE4), 1, file);
	fwrite(&(node->count), sizeof(BYTE2), 1, file);
	fwrite(&(node->branch), sizeof(BYTE2), 1, file);

	for(i=0; i<node->branch; ++i) save_tree(file, node->tree[i]);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Load_Tree
 *
 *		Purpose:		Load a tree structure from the specified file.
 */
void load_tree(FILE *file, TREE *node)
{
	register int i;

	fread(&(node->symbol), sizeof(BYTE2), 1, file);
	fread(&(node->usage), sizeof(BYTE4), 1, file);
	fread(&(node->count), sizeof(BYTE2), 1, file);
	fread(&(node->branch), sizeof(BYTE2), 1, file);

	if(node->branch==0) return;

	node->tree=(TREE **)malloc(sizeof(TREE *)*(node->branch));
	if(node->tree==NULL) {
		error("load_tree", "Unable to allocate subtree");
		return;
	}

	for(i=0; i<node->branch; ++i) {
		node->tree[i]=new_node();
		load_tree(file, node->tree[i]);
	}
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Load_Model
 *
 *		Purpose:		Load a model into memory.
 */
bool load_model(char *filename, MODEL *model)
{
	FILE *file;
	char cookie[16];

	if(filename==NULL) return(FALSE);

	file=fopen(filename, "rb");
	if(file==NULL) {
		error("load_model", "Unable to open file `%s'", filename);
		return(FALSE);
	}

	fread(cookie, sizeof(char), strlen(COOKIE), file);
	if(strncmp(cookie, COOKIE, strlen(COOKIE))!=0) {
		error("load_model", "File `%s' is not a MegaHAL brain", filename);
		goto fail;
	}

	fread(&(model->order), sizeof(BYTE1), 1, file);
	load_tree(file, model->forward);
	load_tree(file, model->backward);
	load_dictionary(file, model->dictionary);

	return(TRUE);
fail:
	fclose(file);
	return(FALSE);
}

/*---------------------------------------------------------------------------*/

/*
 *    Function:   Make_Words
 *
 *    Purpose:    Break a string into an array of words.
 */
void make_words(char *input, DICTIONARY *words)
{
	STRING *tmp;
	int offset=0;

	/*
	 *		Clear the entries in the dictionary
	 */
	words->size=0;

	/*
	 *		If the string is empty then do nothing, for it contains no words.
	 */
	if(strlen(input)==0) return;

	/*
	 *		Loop forever.
	 */
	while(1) {

		/*
		 *		If the current character is of the same type as the previous
		 *		character, then include it in the word.  Otherwise, terminate
		 *		the current word.
		 */
		if(boundary(input, offset)) {
			/*
			 *		Add the word to the dictionary
			 */
			tmp=(STRING *)realloc(words->entry, (words->size+1)*sizeof(STRING));
			if(tmp==NULL) {
				error("make_words", "Unable to reallocate dictionary");
				return;
			} else
				words->entry=tmp;

			words->entry[words->size].length=offset;
			words->entry[words->size].word=input;
			words->size+=1;

			if(offset==(int)strlen(input)) break;
			input+=offset;
			offset=0;
		} else {
			++offset;
		}
	}

	/*
	 *		If the last word isn't punctuation, then replace it with a
	 *		full-stop character.
	 */
	if(isalnum(words->entry[words->size-1].word[0])) {
		tmp=(STRING *)realloc(words->entry, (words->size+1)*sizeof(STRING));
		if(tmp==NULL) {
			error("make_words", "Unable to reallocate dictionary");
			return;
		} else
			words->entry=tmp;

		words->entry[words->size].length=1;
		words->entry[words->size].word=".";
		++words->size;
	}
	else if(strchr("!.?", words->entry[words->size-1].word[0])==NULL) {
		words->entry[words->size-1].length=1;
		words->entry[words->size-1].word=".";
	}

   return;
}
 
/*---------------------------------------------------------------------------*/ 
/*
 *		Function:	Boundary
 *
 *		Purpose:		Return whether or not a word boundary exists in a string
 *						at the specified location.
 */
bool boundary(char *string, int position)
{
	if(position==0)
		return(FALSE);

	if(position==(int)strlen(string))
		return(TRUE);

	if(
		(string[position]=='\'')&&
		(isalpha(string[position-1])!=0)&&
		(isalpha(string[position+1])!=0)
	)
		return(FALSE);

	if(
		(position>1)&&
		(string[position-1]=='\'')&&
		(isalpha(string[position-2])!=0)&&
		(isalpha(string[position])!=0)
	)
		return(FALSE);

	if(
		(isalpha(string[position])!=0)&&
		(isalpha(string[position-1])==0)
	)
		return(TRUE);
	
	if(
		(isalpha(string[position])==0)&&
		(isalpha(string[position-1])!=0)
	)
		return(TRUE);
	
	if(isdigit(string[position])!=isdigit(string[position-1]))
		return(TRUE);

	return(FALSE);
}
 
/*---------------------------------------------------------------------------*/ 
/*
 *		Function:	Make_Greeting
 *
 *		Purpose:		Put some special words into the dictionary so that the
 *						program will respond as if to a new judge.
 */
void make_greeting(DICTIONARY *words)
{
	words->size=0;
	if(grt->size>2) (void)add_word(words, grt->entry[random()%(grt->size-2)+2]);
}
 
/*---------------------------------------------------------------------------*/ 
/*
 *    Function:   Generate_Reply
 *
 *    Purpose:    Take a string of user input and return a string of output
 *                which may vaguely be construed as containing a reply to
 *                whatever is in the input string.
 */
char *generate_reply(MODEL *model, DICTIONARY *words)
{
	static DICTIONARY *dummy=NULL;
	DICTIONARY *replywords;
	DICTIONARY *keywords;
	float surprise;
	float max_surprise;
	char *output;
	int count;
	int basetime;

	/*
	 *		Create an array of keywords from the words in the user's input
	 */
	keywords=make_keywords(model, words);

	/*
	 *		Make sure some sort of reply exists
	 */
	output="I don't know enough to answer you yet!";
	if(dummy==NULL) dummy=new_dictionary();
	replywords=reply(model, dummy);
	if(dissimilar(words, replywords)==TRUE) output=make_output(replywords);

	/*
	 *		Loop for the specified waiting period, generating and evaluating
	 *		replies
	 */
	max_surprise=(float)-1.0;
	count=0;
	basetime=time(NULL);
	do {
		replywords=reply(model, keywords);
		surprise=evaluate_reply(model, keywords, replywords);
		++count;
		if((surprise>max_surprise)&&(dissimilar(words, replywords)==TRUE)) {
			max_surprise=surprise;
			output=make_output(replywords);
		}
	} while((time(NULL)-basetime)<timeout);

	/*
	 *		Return the best answer we generated
	 */
	return(output);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Dissimilar
 *
 *		Purpose:		Return TRUE or FALSE depending on whether the dictionaries
 *						are the same or not.
 */
bool dissimilar(DICTIONARY *words1, DICTIONARY *words2)
{
	register int i;

	if(words1->size!=words2->size) return(TRUE);
	for(i=0; i<words1->size; ++i)
		if(wordcmp(words1->entry[i], words2->entry[i])!=0) return(TRUE);
	return(FALSE);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Make_Keywords
 *
 *		Purpose:		Put all the interesting words from the user's input into
 *						a keywords dictionary, which will be used when generating
 *						a reply.
 */
DICTIONARY *make_keywords(MODEL *model, DICTIONARY *words)
{
	static DICTIONARY *keys=NULL;
	register int i;
	register int j;
	int c;

	if(keys==NULL) keys=new_dictionary();
	else free_dictionary(keys);

	for(i=0; i<words->size; ++i) {
		/*
		 *		Find the symbol ID of the word.  If it doesn't exist in
		 *		the model, or if it begins with a non-alphanumeric
		 *		character, or if it is in the exclusion array, then
		 *		skip over it.
		 */
		c=0;
		for(j=0; j<swp->size; ++j)
			if(wordcmp(swp->from[j], words->entry[i])==0) {
				add_key(model, keys, swp->to[j]);
				++c;
			}
		if(c==0) add_key(model, keys, words->entry[i]);
	}

	if(keys->size>2) for(i=0; i<words->size; ++i) {

		c=0;
		for(j=0; j<swp->size; ++j)
			if(wordcmp(swp->from[j], words->entry[i])==0) {
				add_aux(model, keys, swp->to[j]);
				++c;
			}
		if(c==0) add_aux(model, keys, words->entry[i]);
	}

	return(keys);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Add_Key
 *
 *		Purpose:		Add a word to the keyword dictionary.
 */
void add_key(MODEL *model, DICTIONARY *keys, STRING word)
{
	int symbol;

	symbol=find_word(model->dictionary, word);
	if(symbol==0) return;
	if(isalnum(word.word[0])==0) return;
	symbol=find_word(ban, word);
	if(symbol!=0) return;
	symbol=find_word(aux, word);
	if(symbol!=0) return;

	add_word(keys, word);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Add_Aux
 *
 *		Purpose:		Add an auxilliary keyword to the keyword dictionary.
 */
void add_aux(MODEL *model, DICTIONARY *keys, STRING word)
{
	int symbol;

	symbol=find_word(model->dictionary, word);
	if(symbol==0) return;
	if(isalnum(word.word[0])==0) return;
	symbol=find_word(aux, word);
	if(symbol==0) return;

	add_word(keys, word);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Reply
 *
 *		Purpose:		Generate a dictionary of reply words appropriate to the
 *						given dictionary of keywords.
 */
DICTIONARY *reply(MODEL *model, DICTIONARY *keys)
{
	DICTIONARY *replies=NULL;
	STRING *tmp;
	register int i;
	int symbol;
	bool start=TRUE;

	if(replies==NULL) replies=new_dictionary();
	replies->size=0;

	/*
	 *		Start off by making sure that the model's context is empty.
	 */
	initialize_context(model);
	model->context[0]=model->forward;
	used_key=FALSE;

	/*
	 *		Generate the reply in the forward direction.
	 */
	while(TRUE) {
		/*
		 *		Get a random symbol from the current context.
		 */
		if(start==TRUE) symbol=seed(model, keys);
		else symbol=babble(model, keys, replies);
		if((symbol==0)||(symbol==1)) break;
		start=FALSE;

		/*
		 *		Append the symbol to the reply dictionary.
		 */
		tmp=(STRING *)realloc(replies->entry, (replies->size+1)*sizeof(STRING));
		if(tmp==NULL) {
			error("reply", "Unable to reallocate dictionary");
			return(replies);
		} else
			replies->entry=tmp;

		replies->entry[replies->size].length=
			model->dictionary->entry[symbol].length;
		replies->entry[replies->size].word=
			model->dictionary->entry[symbol].word;
		replies->size+=1;

		/*
		 *		Extend the current context of the model with the current symbol.
		 */
		update_context(model, symbol);
	}

	/*
	 *		Start off by making sure that the model's context is empty.
	 */
	initialize_context(model);
	model->context[0]=model->backward;

	/*
	 *		Re-create the context of the model from the current reply
	 *		dictionary so that we can generate backwards to reach the
	 *		beginning of the string.
	 */
	if(replies->size>0) for(i=MIN(replies->size-1, model->order); i>=0; --i) {
		symbol=find_word(model->dictionary, replies->entry[i]);
		update_context(model, symbol);
	}

	/*
	 *		Generate the reply in the backward direction.
	 */
	while(TRUE) {
		/*
		 *		Get a random symbol from the current context.
		 */
		symbol=babble(model, keys, replies);
		if((symbol==0)||(symbol==1)) break;

		/*
		 *		Prepend the symbol to the reply dictionary.
		 */
		tmp=(STRING *)realloc(replies->entry, (replies->size+1)*sizeof(STRING));
		if(tmp==NULL) {
			error("reply", "Unable to reallocate dictionary");
			return(replies);
		} else
			replies->entry=tmp;

		/*
		 *		Shuffle everything up for the prepend.
		 */
		for(i=replies->size; i>0; --i) {
			replies->entry[i].length=replies->entry[i-1].length;
			replies->entry[i].word=replies->entry[i-1].word;
		}

		replies->entry[0].length=model->dictionary->entry[symbol].length;
		replies->entry[0].word=model->dictionary->entry[symbol].word;
		replies->size+=1;

		/*
		 *		Extend the current context of the model with the current symbol.
		 */
		update_context(model, symbol);
	}

	return(replies);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Evaluate_Reply
 *
 *		Purpose:		Measure the average surprise of keywords relative to the
 *						language model.
 */
float evaluate_reply(MODEL *model, DICTIONARY *keys, DICTIONARY *words)
{
	register int i;
	register int j;
	int symbol;
	float probability;
	int count;
	float entropy=(float)0.0;
	TREE *node;
	int num=0;

	if(words->size<=0) return((float)0.0);
	initialize_context(model);
	model->context[0]=model->forward;
	for(i=0; i<words->size; ++i) {
		symbol=find_word(model->dictionary, words->entry[i]);

		if(find_word(keys, words->entry[i])!=0) {
			probability=(float)0.0;
			count=0;
			++num;
			for(j=0; j<model->order; ++j) if(model->context[j]!=NULL) {
	
				node=find_symbol(model->context[j], symbol);
				probability+=(float)(node->count)/
					(float)(model->context[j]->usage);
				++count;
	
			}

			if(count>0.0) entropy-=(float)log(probability/(float)count);
		}

		update_context(model, symbol);
	}

	initialize_context(model);
	model->context[0]=model->backward;
	for(i=words->size-1; i>=0; --i) {
		symbol=find_word(model->dictionary, words->entry[i]);

		if(find_word(keys, words->entry[i])!=0) {
			probability=(float)0.0;
			count=0;
			++num;
			for(j=0; j<model->order; ++j) if(model->context[j]!=NULL) {
	
				node=find_symbol(model->context[j], symbol);
				probability+=(float)(node->count)/
					(float)(model->context[j]->usage);
				++count;
	
			}

			if(count>0.0) entropy-=(float)log(probability/(float)count);
		}

		update_context(model, symbol);
	}

	if(num>=8) entropy/=(float)sqrt(num-1);
	if(num>=16) entropy/=(float)num;

	return(entropy);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Make_Output
 *
 *		Purpose:		Generate a string from the dictionary of reply words.
 */
char *make_output(DICTIONARY *words)
{
	char *output=NULL;
	register int i;
	register int j;
	char *tmp;
	int length;

	if(words->size==0) return("I am utterly speechless!");

	length=1;
	for(i=0; i<words->size; ++i) length+=words->entry[i].length;

	tmp=(char *)realloc(output, sizeof(char)*length);
	if(tmp==NULL) {
		error("make_output", "Unable to reallocate output.");
		return("I forget what I was going to say!");
	} else {
		output=tmp;
	}

	length=0;
	for(i=0; i<words->size; ++i)
		for(j=0; j<words->entry[i].length; ++j)
			output[length++]=words->entry[i].word[j];
			
	output[length]='\0';

	return(output);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Babble
 *
 *		Purpose:		Return a random symbol from the current context, or a
 *						zero symbol identifier if we've reached either the
 *						start or end of the sentence.  Select the symbol based
 *						on probabilities, favouring keywords.  In all cases,
 *						use the longest available context to choose the symbol.
 */
int babble(MODEL *model, DICTIONARY *keys, DICTIONARY *words)
{
	TREE *node;
	register int i;
	int count;
	int symbol;

	/*
	 *		Select the longest available context.
	 */
	for(i=0; i<=model->order; ++i)
		if(model->context[i]!=NULL)
			node=model->context[i];

	if(node->branch==0) return(0);

	/*
	 *		Choose a symbol at random from this context.
	 */
	i=random()%(node->branch);
	count=random()%(node->usage);
	while(count>=0) {
		/*
		 *		If the symbol occurs as a keyword, then use it.  Only use an
		 *		auxilliary keyword if a normal keyword has already been used.
		 */
		symbol=node->tree[i]->symbol;

		if(
			(find_word(keys, model->dictionary->entry[symbol])!=0)&&
			((used_key==TRUE)||
			(find_word(aux, model->dictionary->entry[symbol])==0))&&
			(word_exists(words, model->dictionary->entry[symbol])==FALSE)
		) {
			used_key=TRUE;
			break;
		}
		count-=node->tree[i]->count;
		i=(i>=(node->branch-1))?0:i+1;
	}

	return(symbol);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Word_Exists
 *
 *		Purpose:		A silly brute-force searcher for the reply string.
 */
bool word_exists(DICTIONARY *dictionary, STRING word)
{
	register int i;

	for(i=0; i<dictionary->size; ++i)
		if(wordcmp(dictionary->entry[i], word)==0)
			return(TRUE);
	return(FALSE);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Seed
 *
 *		Purpose:		Seed the reply by guaranteeing that it contains a
 *						keyword, if one exists.
 */
int seed(MODEL *model, DICTIONARY *keys)
{
	register int i;
	int symbol;
	int stop;

	if(model->context[0]->branch==0) symbol=0;
	else symbol=random()%(model->context[0]->branch);

	if(keys->size>2) {
		do { i=random()%(keys->size); } while(i<2);
		stop=i;
		while(TRUE) {
			if(
				(find_word(model->dictionary, keys->entry[i])!=0)&&
				(find_word(aux, keys->entry[i])==0)
			) {
				symbol=find_word(model->dictionary, keys->entry[i]);
				return(symbol);
			}
			++i;
			if(i==keys->size) i=2;
			if(i==stop) return(symbol);
		}
	}

	return(symbol);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	New_Swap
 *
 *		Purpose:		Allocate a new swap structure.
 */
SWAP *new_swap(void)
{
	SWAP *list;

	list=(SWAP *)malloc(sizeof(SWAP));
	if(list==NULL) {
		error("new_swap", "Unable to allocate swap");
		return(NULL);
	}
	list->size=0;
	list->from=NULL;
	list->to=NULL;

	return(list);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Add_Swap
 *
 *		Purpose:		Add a new entry to the swap structure.
 */
void add_swap(SWAP *list, char *s, char *d)
{
	list->size+=1;

	list->from=(STRING *)realloc(list->from, sizeof(STRING)*(list->size));
	if(list->from==NULL) {
		error("add_swap", "Unable to reallocate from");
		return;
	}

	list->to=(STRING *)realloc(list->to, sizeof(STRING)*(list->size));
	if(list->to==NULL) {
		error("add_swap", "Unable to reallocate to");
		return;
	}

	list->from[list->size-1].length=strlen(s);
	list->from[list->size-1].word=strdup(s);
	list->to[list->size-1].length=strlen(d);
	list->to[list->size-1].word=strdup(d);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Initialize_Swap
 *
 *		Purpose:		Read a swap structure from a file.
 */
SWAP *initialize_swap(char *filename)
{
	SWAP *list;
	FILE *file=NULL;
	char buffer[1024];
	char *from;
	char *to;

	list=new_swap();

	if(filename==NULL) return(list);

	file=fopen(filename, "r");
	if(file==NULL) return(list);

	while(!feof(file)) {

		if(fgets(buffer, 1024, file)==NULL) break;
		if(buffer[0]=='#') continue;
		from=strtok(buffer, "\t ");
		to=strtok(NULL, "\t \n#");

		add_swap(list, from, to);
	}

	fclose(file);
	return(list);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Initialize_List
 *
 *		Purpose:		Read a dictionary from a file.
 */
DICTIONARY *initialize_list(char *filename)
{
	DICTIONARY *list;
	FILE *file=NULL;
	STRING word;
	char *string;
	char buffer[1024];

	list=new_dictionary();

	if(filename==NULL) return(list);

	file=fopen(filename, "r");
	if(file==NULL) return(list);

	while(!feof(file)) {

		if(fgets(buffer, 1024, file)==NULL) break;
		if(buffer[0]=='#') continue;
		string=strtok(buffer, "\t \n#");

		if((string!=NULL)&&(strlen(string)>0)) {
			word.length=strlen(string);
			word.word=strdup(buffer);
			add_word(list, word);
		}
	}

	fclose(file);
	return(list);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Delay
 *
 *		Purpose:		Display the string to stdout as if it was typed by a human.
 */
void delay(char *string)
{
	register int i;
	
	/*
	 *		Don't simulate typing if the feature is turned off
	 */
	if(typing_delay==FALSE)	{
		fprintf(stdout, string);
		return;
	}

	/*
	 *		Display the entire string, one character at a time
	 */
	for(i=0; i<(int)strlen(string)-1; ++i) typein(string[i]);
	usleep((D_THINK+random()%V_THINK-random()%V_THINK)/2);
	typein(string[i]);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Typein
 *
 *		Purpose:		Display a character to stdout as if it was typed by a human.
 */
void typein(char c)
{
	/*
	 *		Standard keyboard delay
	 */
	usleep(D_KEY+random()%V_KEY-random()%V_KEY);
	fprintf(stdout, "%c", c);
	fflush(stdout);
	
	/*
	 *		A random thinking delay
	 */
	if((!isalnum(c))&&((random()%100)<P_THINK))
		usleep(D_THINK+random()%V_THINK-random()%V_THINK);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Ignore
 *
 *		Purpose:		Log the occurrence of a signal, but ignore it.
 */
void ignore(int sig)
{
	if(sig!=0) error("ignore", "MegaHAL received signal %d", sig);

	signal(SIGINT, ignore);
	signal(SIGILL, die);
	signal(SIGFPE, die);
	signal(SIGSEGV, die);
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Die
 *
 *		Purpose:		Log the occurrence of a signal, and exit.
 */
void die(int sig)
{
	fprintf(stderr, "Program died!  Please restart!\n");
	error("die", "MegaHAL received signal %d", sig);
	exithal();
}

/*---------------------------------------------------------------------------*/

/*
 *		Function:	Usleep
 *
 *		Purpose:		Simulate the Un*x function usleep.  Necessary because
 *						Microsoft provide no similar function.  Performed via
 *						a busy loop, which unnecessarily chews up the CPU.
 *						But Windows '95 isn't properly multitasking anyway, so
 *						no-one will notice.  Modified from a real Microsoft
 *						example, believe it or not!
 */
void usleep(int period)
{
	clock_t goal;

	goal=(clock_t)(period*CLOCKS_PER_SEC)/(clock_t)1000000+clock();
	while(goal>clock());
}

/*===========================================================================*/

/*
 *		$Log: megahal.c,v $
 *		Revision 1.8  1997/12/24 03:17:01  hutch
 *		More bug fixes, and hopefully the final contest version!
 *
 *		Revision 1.7  1997/12/22  13:18:09  hutch
 *		A few more bug fixes, and non-repeating implemented.
 *
 *		Revision 1.6  1997/12/22 04:27:04  hutch
 *		A few minor bug fixes.
 *
 *		Revision 1.5  1997/12/15 04:35:59  hutch
 *		Final Loebner version!
 *
 *		Revision 1.4  1997/12/11 05:45:29  hutch
 *		the almost finished version.
 *
 *		Revision 1.3  1997/12/10 09:08:09  hutch
 *		Now Loebner complient (tm)
 *
 *		Revision 1.2  1997/12/08 06:22:32  hutch
 *		Tidied up.
 *
 *		Revision 1.1  1997/12/05  07:11:44  hutch
 *		Initial revision
 *
 *		Revision 1.7  1997/12/04 07:07:13  hutch
 *		Added load and save functions, and tidied up some code/
 *
 *		Revision 1.6  1997/12/02 08:34:47  hutch
 *		Added the ban, aux and swp functions.
 *
 *		Revision 1.5  1997/12/02 06:03:04  hutch
 *		Updated to use a special terminating symbol, and to store only
 *		branches of maximum depth, as they are the only ones used in
 *		the reply.
 *
 *		Revision 1.4  1997/10/28 09:23:12  hutch
 *		MegaHAL is babbling nicely, but without keywords.
 *
 *		Revision 1.3  1997/10/15  09:04:03  hutch
 *		MegaHAL can parrot back whatever the user says.
 *
 *		Revision 1.2  1997/07/21 04:03:28  hutch
 *		Fully working.
 *
 *		Revision 1.1  1997/07/15 01:55:25  hutch
 *		Initial revision
 *
 *		Revision 1.1  1997/07/15 01:54:21  hutch
 *		Initial revision
 */

/*===========================================================================*/

