
#include "map_repl_read.hh"

#include <iostream>
#include <fstream>
#include <slist>
#include <string>
#include <iomanip>
#include <cstdio>

#include "config_data.hh"
#include "trim_space.hh"

namespace afilter {

  static string to_hex(int i, int width = 0) {
    char s[9];
    sprintf(s, "%0*X", width, i);
    return s;
  }

  static int from_hex(const string & s) {
    int out;
    int num = -1;
    sscanf(s.c_str(), "%x%n", &out, &num);
    if (num != s.size()) throw MapReplReadError::bad_hex_string(s);
    return out;
  }

  static void read_line(istream & in, string & line, int & linenum) {
    getline(in, line);
    if (!in) return;
    ++linenum;
    string::size_type pos = line.find("##");
    if (pos == 0) read_line(in,line,linenum);
    else line = line.substr(0, pos);
  }

  static void split_line(const string & line, string & key, string & value) {
    string::size_type pos = line.find_first_of(" \t");
    key = trim_space(line.substr(0,pos));
    value = trim_space(line.substr(pos+1));
  }

  string MapReplReadError::error_w_line::line_prefix() const {
    char s[30]; 
    sprintf(s, "Line %d: ", line); 
    return s;
  }

  string MapReplReadError::ambiguous_str_uni::mesg() const {
    return "Ambiguous: " + str 
      + " goes to both " + to_hex(uni_char1,4) + " and "
      + to_hex(uni_char2,4);
  }

  string MapReplReadError::ambiguous_uni_str::mesg() const {
    return "Ambiguous: " + to_hex(uni_char,4) 
      + " goes to both " + str1 + " and " + str2; 
  }

  class MapReplReadPriv {
  public:
    struct item {
      enum TheOne {not_the_one, possibly_the_one, the_one};
      string   str;
      TheOne   str_one;
      unichar  uni_char;
      TheOne   uni_one;
      item() 
	: str_one(possibly_the_one), 
	uni_char(0), uni_one(possibly_the_one) {}
    };

    static inline bool item_fir (const item& a, const item& b) {
      if (a.str != b.str) return a.str < b.str;
      else if (a.str_one != b.str_one) return a.str_one > b.str_one;
      else return a.uni_char < b.uni_char;
    }

    static inline bool item_stro (const item& a, const item& b) {
      return a.str < b.str;
    }

    static inline bool item_lst (const item& a, const item& b) {
      if (a.uni_char != b.uni_char) return a.uni_char < b.uni_char;
      else if (a.uni_one != b.uni_one) return a.uni_one > b.uni_one;
      else return a.str < b.str;
    }

    class s_in_stream {
      typedef string::const_iterator Itr;
      Itr i_;
      Itr end_;
    public:
      s_in_stream() {}
      void set(Itr i, Itr end) {i_ = i; end_ = end;}
      s_in_stream & operator >> (string &s) {
	s = "";
	while (i_ != end_ && (*i_ == ' ' || *i_ == '\t')) ++i_;
	for (; i_ != end_ && *i_ != ' ' && *i_ != '\t'; ++i_) 
	  s += *i_;
	return *this;
      }
      operator bool () const {return i_ == end_;}
    };
  };

  static string debackslash(const string &s) {
    string::const_iterator i   = s.begin();
    string::const_iterator end = s.end();
    string temp;
    bool in_backslash = false;
    char c[3] = "  ";
    while (i != end) {
      if (*i == '%') {
	in_backslash = true;
      } else if (in_backslash) {
	switch (*i) {
	case 'a':  temp += '\a'; break;
	case 'b':  temp += '\b'; break;
	case 'e':  temp += '\x1B'; break;
	case 'f':  temp += '\f'; break;
	case 'n':  temp += '\n'; break;
	case 'r':  temp += '\r'; break;
	case 's':  temp += ' '; break;
	case 't':  temp += '\t'; break;
	case 'v':  temp += '\v'; break;
	case '%':  temp += '%'; break;
	case '#':  temp += "%#"; break;
	case 'x':  
	  if (++i != end) c[0] = *i; 
	  else throw MapReplReadError::hex_string_improper_size(c, '2');
	  if (++i != end) c[1] = *i; 
	  else throw MapReplReadError::hex_string_improper_size(c, '2');
	  temp += from_hex(c); 
	  break;
	default: throw MapReplReadError::invalid_escape_seq(string("%") + *i);
	}
	in_backslash=false;
      } else {
	temp += *i;
      }
      ++i;
    }
    return temp;
  }

  static ostream& operator<< (ostream& o,  MapReplData::Stats::Mapping m) {
    o << (m ==  MapReplData::Stats::m_one ? "One" : "Many");
    return o;
  }

  typedef MapReplReadPriv::item Item;
  typedef slist<Item>           Items;
  typedef MapReplReadError      Error;
  typedef MapReplData::Stats    Stats;
  const MapReplData::Stats::Mapping one  = MapReplData::Stats::m_one;
  const MapReplData::Stats::Mapping many = MapReplData::Stats::m_many;

  static bool readin(const string & name, const ConfigData & opts,
		     Stats & stats, Items & items, Error & errors_)
  {
    string file_name = opts.retrieve("data-dir") + '/' + name + ".map";
    ifstream in(file_name.c_str());
    if (in.rdstate() & ios::badbit) {
      return false;
    }
  
    errors_.file = file_name;

    string temp;
    int l = 0;
    read_line(in, stats.name, l);

    string key, value;
    for (;;) {
      read_line(in, temp, l);
      split_line(temp, key, value);

      if (key == "special")
	stats.special = value;
      else if (key == "extends")
	stats.extends = value;
      else if (key == "ontop")
	stats.ontop = value;
      else if (key == "begin") 
	break;
      else
	errors_.add((new Error::expecting_keyword("begin", temp))->set_line(l));
    }

    Item numerical_item;
    MapReplReadPriv::s_in_stream sin;

    string line;

    while(in) {
      read_line(in, line, l);
      if (!in) break;
      if (line.size() == 0) continue;
      sin.set(line.begin(), line.end());
    
      items.push_front(Item());
      Item * i = &items.front();
      try {
	sin >> temp;
	temp = debackslash(temp);
	if (temp.find("%#") != string::npos) {
	  items.pop_front();
	  i = &numerical_item;
	}
	i->str = temp;
      
	sin >> temp;
	if (temp == "1") {
	  i->str_one=Item::the_one;
	  sin >> temp;
	} else if (temp == "0") {
	  i->str_one=Item::not_the_one; 
	  sin >> temp;
	} else if (temp == "-") {
	  i->str_one=Item::possibly_the_one; 
	  sin >> temp;
	}
      
	if (temp.size() != 4) throw Error::hex_string_improper_size(temp, '4');
	i->uni_char = from_hex(temp);
      
	sin >> temp;
	if (temp == "1") 
	  i->uni_one=Item::the_one;
	else if (temp == "0") 
	  i->uni_one=Item::not_the_one;
	else if (temp == "-")
	  i->uni_one=Item::possibly_the_one;

      } catch (Error::error_w_line & err) {

	err.line = l;
	errors_.add(err.clone());
	items.pop_front();
      
      }

      if (!errors_.empty()) throw errors_;
    }
    return true;
  }

  static void finish(MapReplData & data, Items & items, Error & errors_) 
  {
    Stats & stats = data.stats;

    items.sort(MapReplReadPriv::item_fir);

    slist<Item>::iterator prev = items.begin();

    stats.first_same  = true;
    stats.first_char  = prev->str[0];
    stats.last_same   = true;
    stats.last_char   = *(prev->str.end()-1);
    stats.unambiguous_last_char 
      = prev->str.find(stats.last_char) == prev->str.size()-1;
    stats.max_size    = prev->str.size();
    stats.unambiguous = true;
    stats.str_mapping   = one;
    stats.uni_mapping   = one;

    prev->str_one = Item::the_one;

    for (slist<Item>::iterator i = ++items.begin(); i != items.end(); ++i) {

      if (i->str[0] != stats.first_char) 
	stats.first_same = false;

      if (*(i->str.end()-1) != stats.last_char) 
	stats.last_same = false;

      if (stats.last_same && stats.unambiguous_last_char) 
	stats.unambiguous_last_char 
	  = i->str.find(stats.last_char) == prev->str.size()-1;

      if (i->str.size() > stats.max_size) 
	stats.max_size = i->str.size();

      if (i->str == prev->str) 
	{
	  stats.str_mapping = many;
	  if (!prev->str_one)
	    errors_.add
	      (new Error::ambiguous_str_uni(prev->str, prev->uni_char, i->uni_char));
	} 
      else 
	{      
	  if (i->str_one == Item::possibly_the_one)
	    i->str_one = Item::the_one;
	  prev = i;
	}
    }

    items.sort(MapReplReadPriv::item_stro);
    prev = items.begin();
    for (slist<Item>::iterator i = ++items.begin(); i != items.end(); ++i) {
      if (i->str != prev->str 
	  && prev->str == i->str.substr(0, prev->str.size()))
	stats.unambiguous = false;
      prev = i;
    }  


    items.sort(MapReplReadPriv::item_lst);
    prev = items.begin();
    prev->uni_one = Item::the_one;

    for (slist<Item>::iterator i = ++items.begin(); i != items.end(); ++i) {
      if (i->uni_char == prev->uni_char) {
	stats.uni_mapping = many;
	if (!prev->uni_one)
	  errors_.add
	    (new Error::ambiguous_uni_str(prev->str, i->str, prev->uni_char));
      } else {
	if (i->uni_one == Item::possibly_the_one)
	  i->uni_one = Item::the_one;
	prev = i;
      }
    }
  
    if (!errors_.empty()) throw errors_;

    items.sort(MapReplReadPriv::item_fir);
    for (slist<Item>::iterator i = items.begin(); i != items.end(); ++i) {
      if (i->str_one != Item::the_one) continue;
      data.to_unicode.push_back(MapReplData::ToUniPair(i->str, i->uni_char));
    }
  
    items.sort(MapReplReadPriv::item_lst);
    for (slist<Item>::iterator i = items.begin(); i != items.end(); ++i) {
      if (i->uni_one != Item::the_one) continue;
      data.from_unicode.push_back(MapReplData::FromUniPair(i->uni_char, i->str));
    }
  }

  static void report(const MapReplData & data, ostream & mesg, int verbose_level)
  {
    const Stats & stats = data.stats;
    if (verbose_level > 0) {
      mesg << "Name: " << stats.name << endl;
      mesg << stats.str_mapping << " to " << stats.uni_mapping << endl;
      if (stats.first_same) 
	mesg << "First Character: " << stats.first_char << endl;
      if (stats.last_same) {
	mesg << "Last Char: " << stats.last_char << endl;
	mesg << "Unambiguous last char: " << stats.unambiguous_last_char << endl;
      }
      mesg << "Unambiguous substr match: " << stats.unambiguous << endl;
      mesg << "Max Size: " << stats.max_size << endl;
      if (stats.special.size()) 
	mesg << "Special: " << stats.special << endl;
      if (stats.extends.size()) 
	mesg << "Extends: " << stats.extends << endl;
      if (stats.ontop.size()) 
	mesg << "On Top: " << stats.ontop << endl;
    }
  
    if (verbose_level > 1) {
      mesg << "---" << endl;
      for (MapReplData::ToUnicode::const_iterator i = data.to_unicode.begin();
	   i != data.to_unicode.end(); 
	   ++i)
	mesg << i->first << ' ' << to_hex(i->second,4) << endl;

      mesg << "---" << endl;
      for (MapReplData::FromUnicode::const_iterator i = data.from_unicode.begin();
	   i != data.from_unicode.end(); 
	   ++i)
	mesg << to_hex(i->first,4) << ' ' << i->second << endl;

    }
  }

  bool read(const string & name, const ConfigData & opts,
	    MapReplData & data, ostream & diag, int verbose_level)
  {
    Items       items;
    Error       errors;
    if (!readin(name, opts, data.stats, items, errors)) return false;
    if (data.stats.extends.size()) {
      Stats st;
      Error err;
      if (!readin(data.stats.extends, opts, st, items, err)) {
	errors.add(new Error::cant_read_base(data.stats.extends));
	throw errors;
      }
    }
    finish(data, items, errors);
    report(data, diag, verbose_level);
    return true;
  }
}
