/*************************************************************************
* Bulgarian-English Dictionary
* Copyright (C) 2000  Radostin Radnev <radnev@yahoo.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*************************************************************************/

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#include "database.h"



//=== Class Database =====================================================
// Written by Radostin Radnev - radnev@yahoo.com
// $Id: database.cpp,v 1.9 2001/03/11 05:21:32 radnev Exp $
//
// This class represents Database. It search and read data from text file
//
// Database file must be in plain text with delimiters between different
// data '\0'. It is useful because C/C++ uses '\0' to terminate a strings.
// The structure must be following: Word must be first. It must be
// followed by delimiter '\n'. It also can be used to divide different
// meanings in translation data. After '\n' starts a translation data
// (result). And it ends of course with '\0'. And after that starts new
// word. Data file must be sorted by contents of words. That means
// excluding spaces, dashes and other non alpha chars and sorting data
// without them. Also database cannot include two equal contents of words.
// That means "SECOND-HAND" and "SECOND HAND" are the same words and it is
// "SECONDHAND". Also database must starts and ends with '\0'. In this way
// search method does not need to make check of beginning and end of file.
//
// Method uses binary search to find a specified word. The problem is that
// database is not with fixed records (positions of words) and each time
// it goes somewhere in the middle in the translation data. In this case
// method read forward data to meet data delimiter ('\0'). If it meets
// end of binary search position then it look backward to ensure that
// no more words in search interval. When it meet data delimiter it
// compare extracted word with entered word and continue to search
// depending of result or stop execution if founds word. If method does
// not found a word it returns a nearest similar word in search interval.
//
// The forward search is increased by using dataBuffer, but backward
// search read every byte separately. It is not big problem because method
// uses only forward search in most cases.
//
// Currently engbul.dat and buleng.dat in word positions contains
// only " ", "-" and "'", so I delete only this chars from words content.
// Also all chars in word position in both database are Upper case. And
// the search method expect as search word only Upper case.
//========================================================================



// Constant declaration
const int  Database::MAX_WORD_LEN      = 50;
const int  Database::MAX_DATA_LEN      = 10000;
const char Database::WORD_SEPARATOR    = '\n';
const char Database::DATA_SEPARATOR    = '\0';
const char Database::NON_ALPHA_CHARS[] = "-' ";



//=== Constructor ========================================================
// Allocate memory
//========================================================================
Database::Database() {
	dataBuffer = new char[MAX_DATA_LEN];
	wordBuffer = new char[MAX_WORD_LEN];
	compBuffer = new char[MAX_WORD_LEN];
	dataFile = NULL;
} // End of Constructor



//=== Destructor =========================================================
// Close Database and free memory
//========================================================================
Database::~Database() {
	delete [] dataBuffer;
	delete [] wordBuffer;
	delete [] compBuffer;
	if (dataFile != NULL) {
		fclose(dataFile);
	}
} // End of Destructor



//=== Create Dictionary ==================================================
// It is the real constructor of object
// Open data file and set some variables
// Return true if success, false if failed
//========================================================================
bool Database::createDictionary(const char *fileName, const long fixedLastWordPointer) {
	// Ensure against invoking twice
	if (dataFile != NULL) {
		return false;
	}
	// Set buffers to zero length
	dataBuffer[0] = '\0';
	wordBuffer[0] = '\0';
	compBuffer[0] = '\0';
	// Open DataFile
	dataFile = fopen(fileName, "r");
	// Return false if failed
	if (dataFile == NULL) {
		return false;
	}
	// Set First Word Pointer
	firstWordPointer = 0;
	// Set Last Word Pointer
	if (fixedLastWordPointer > 0) {
		lastWordPointer = fixedLastWordPointer;
	}
	else {
		fseek(dataFile, -2L, SEEK_END);
		lastWordPointer = ftell(dataFile);
		do {
			lastWordPointer--;
			fseek(dataFile, lastWordPointer, SEEK_SET);
			fread(dataBuffer, 1, 1, dataFile);
		} while (dataBuffer[0] != DATA_SEPARATOR);
	}
	// Set Current and Next Word Pointer
	currentWordPointer = firstWordPointer;
	lastSearchWordPointer = firstWordPointer;
	// Read (load) data in buffers
	readData();
	// Set data in compBuff
	onlyLetters(dataBuffer);
	return true;
} // End of createDictionary



//=== Go To First Word ===================================================
// Point word pointer to first word and read (load) data in buffers
//========================================================================
void Database::goToFirstWord() {
	currentWordPointer = firstWordPointer;
	readData();
} // End of goToFirstWord



//=== Go To Last Word =+==================================================
// Point word pointer to last word and read (load) data in buffers
//========================================================================
void Database::goToLastWord() {
	currentWordPointer = lastWordPointer;
	readData();
} // End of goToLastWord



//=== Find Word ==========================================================
// Find specified word
// Return true if found and false if does not found
// Point pointer **result to the word (or nearest word)
//========================================================================
bool Database::findWord(const char *word, char **result) {
	long b, e;               // Begin and End position of search
	long m, rm;              // Middle position for binary seacrh and remember middle variable
	int comp;                // Here we store result of comparing two words
	int pos;                 // Position

	// Calculating the begin and the end of search area
	b = firstWordPointer;                               // Set first word pointer
	e = lastWordPointer;                                // Set last word pointer
	comp = strcmp(compBuffer, word);                    // Compare last searched word with new one
	if (comp < 0) {                                     // We can narrow search in this way
		b = lastSearchWordPointer;                      // Set begin of the search area
	}                                                   //   to the last search word pointer
	else if (comp > 0) {                                // Set end of the search area
		e = lastSearchWordPointer;                      //   to the last search word pointer
	}                                                   // Ooops! good news
	else {                                              // We have the same word for translation
		currentWordPointer = lastSearchWordPointer;     // Set currentWordPointer
		readData();                                     // Read (load) data in buffers
		*result = dataBuffer;                           // Set *result pointer
		return true;                                    // Stop execution and return true
	}

	// Search for word
	while (true) {                                                         // Main loop
		m = (b + e) / 2;                                                   // Get middle position of file
		rm = m;                                                            // Remember it for future use
		fseek(dataFile, m, SEEK_SET);                                      // Set at middle
		fread(dataBuffer, 1, MAX_DATA_LEN, dataFile);                      // Read forward to meet new word
		pos = strlen(dataBuffer);                                          // This is a offset to a new word
		m += pos;                                                          // Now we in a position of new word
		onlyLetters(dataBuffer + pos);                                     // Get only letters of new word
		if (m == e) {                                                      // If current position = end pos
			comp = strcmp(compBuffer, word);                               // Compare word with entered word
			if (comp <= 0) {                                               // If it equals or less that entered
				break;                                                     //   we exit because found or not found
			}
			else {                                                         // Else we search for new word backward
				m = rm;                                                    // Restore middle position
				fseek(dataFile, m, SEEK_SET);                              // Set again at middle
				do {                                                       // Read backward to meet new word
					m--;
					fseek(dataFile, m, SEEK_SET);
					fread(dataBuffer, 1, 1, dataFile);
				} while (dataBuffer[0] != DATA_SEPARATOR);                 // Now we in a position of new word
				fread(dataBuffer, 1, MAX_WORD_LEN, dataFile);              // Read data in buffer but only word
				onlyLetters(dataBuffer);                                   // Get only letters of new word
				if (m == b) {                                              // If current position = beginning pos
					comp = strcmp(compBuffer, word);                       // Compare word with entered word
					if (comp < 0) {                                        // If greater than firts word we get last
						m = e;                                             //   get last word in search
					}
					break;                                                 //   we exit because search ends
				}
			}
		}
		comp = strcmp(compBuffer, word);                                   // Compare middle word with enetered
		if (comp < 0) {                                                    // If middle word less than eneterd
			b = m;                                                         //   then beginning =  middle
		}
		else if (comp > 0) {                                               // If middle word greater than eneterd
			e = m;                                                         //   then end = middle
		}
		else {                                                             // Else two words are equal
			break;                                                         //   then we exit from loop
		}
	}
	lastSearchWordPointer = m;                                             // Set lastSearchWordPointer
	currentWordPointer = m;                                                // Set currentWordPointer
	readData();                                                            // Read (load) data in buffers
	onlyLetters(dataBuffer);                                               // Get only letters for the next search
	*result = dataBuffer;                                                  // Set *result pointer
	return (comp == 0);                                                    // Return found or not found
} // End of findWord



//=== Only Letters =======================================================
// Delete non letters chars from word
// Store result in compBuffer
//========================================================================
void Database::onlyLetters(const char *word) {
	int i = 0, j = 0;
	while (word[i] != WORD_SEPARATOR) {
		if (strchr(NON_ALPHA_CHARS, word[i]) == NULL) {
			compBuffer[j] = word[i];
			j++;
		}
		i++;
	}
	compBuffer[j] = '\0';
} // End of onlyLetters



//=== Read Data ==========================================================
// Load data in buffers
// Translational data in dataBuffer and word in wordBuffer
//========================================================================
void Database::readData() {
	fseek(dataFile, currentWordPointer + 1, SEEK_SET);
	fread(dataBuffer, 1, MAX_DATA_LEN, dataFile);
	int i = 0;
	while (dataBuffer[i] != WORD_SEPARATOR) {
		wordBuffer[i] = dataBuffer[i];
		i++;
	}
	wordBuffer[i] = '\0';
} // End of readData



//=== Get Word ===========================================================
// Gets word pointed by word pointer
//========================================================================
char *Database::getWord() {
	return wordBuffer;
} // End of getWord



//=== Get Result =========================================================
// Gets result (translation data) pointed by word pointer
//========================================================================
char *Database::getResult() {
	return dataBuffer;
} // End of getResult



//=== Go To Next Word ====================================================
// Search forward and point pointer to the next word in list
// Return true if next word exist and false if word pointer is on last word
//========================================================================
bool Database::goToNextWord() {
	bool ret = false;
	if (currentWordPointer < lastWordPointer) {
		ret = true;
		currentWordPointer += (strlen(dataBuffer) + 1);
	}
	readData();
	return ret;
} // End of goToNextWord



//=== Get Next Random Word ===============================================
// Gets the next random word
// Move word pointer to the next random word and return word
//========================================================================
char *Database::getNextRandomWord() {
	long pos = firstWordPointer + (long)((((double)lastWordPointer) * rand()) / (RAND_MAX + (double)firstWordPointer));
	if (pos < firstWordPointer) {
		pos = firstWordPointer;
	}
	if (pos > lastWordPointer) {
		pos = lastWordPointer;
	}
	fseek(dataFile, pos, SEEK_SET);
	fread(dataBuffer, 1, MAX_DATA_LEN, dataFile);
	currentWordPointer = pos + strlen(dataBuffer);
	readData();
	return wordBuffer;
} // End of getNextRandomWord
