/* @(#)md.c	1.7 97/10/31 */
#include <stdio.h>
#include <iconv.h>
#include <assert.h>
#include "utfchar.h"
#include "hhentry.h"
#include "md_hanja_dic.h"

void
hash_table_init (HHList table[], int size);

iconv_t prepare_iconv (const char *to_encoding,
		       const char *from_encoding);

extern void
test_read_first (FILE *fp);

HHList hash_table[HASH_TABLE_NUMBER];

HHList hhlist;
main(argc,argv)
     int argc;
     char **argv;
{
  char sname[40],sword[40]; /* Dictionary Source */
  char tname[40],tword[40]; /* Dictionary Target */
  
  int sum = 0;
    
  int i,j,k,length,count,bad;
  unsigned char c,info[80];
  FILE *s;
  FILE *fdic;
  HH *hh;
  char test_buffer[256];
  long test_input;

  iconv_t cd;
  iconv_t cd_utf8_to_utf16;
  int hval;

  if (argc != 3){
    fprintf (stderr, "you should provide one data file to process\n");
    exit (-1);
  }
  
  cd = prepare_iconv ("UTF-8", "EUC-KR");
  cd_utf8_to_utf16 = prepare_iconv ("UTF-16", "UTF-8");
  
  if (cd == (iconv_t) -1){
    fprintf (stderr, "Failed open iconv, esiting...\n");
    exit (-1);
  }
    

  sprintf(sname, argv[1]);
  if ((s = fopen(sname,"r")) != NULL){
    printf("Processing %s...\n",sname);
    count=0; bad=0;
    while (1)
      {
	j = 0;
	while (c = getc(s),(!feof(s) && (c != '\n')))
	  info[j++] = c;
	if (feof(s))
	  break;
	j = 0;
	if (k = 0,info[j++] == '[') {
	  if (strchr(info+j,']'))
	    while (c = info[j++],c != ']')
	      if (c > 0x7f)
		sword[k++] = c;
	      else { bad = 1; break; }
	  else
	    bad = 1;
	}
	else bad = 1;
	sword[k]=0;
	if (k = 0,info[j++] == '[') {
	  if (strchr(info+j,']'))
	    while (c = info[j++],c != ']')
	      if (c > 0x7f) tword[k++] = c; else { bad = 1; break; }
	  else bad = 1;
	}
	else
	  bad = 1;
	tword[k] = 0;
	count++;
	if (bad)
	  {
	    printf("Bad data on %d\n",count); bad = 0;
	  }
	else if (strlen(sword)>MAXLENGTH) {
	  printf("Too long word on %d\n",count);
	} else if (strlen(sword)!=strlen(tword)) {
	  printf("Incompatible data on %d\n",count);
	} else {
	  
	  hh = process_hangul_hanja_pair ((iconv_t) -1, sword, tword);
	  /* verification purpose */
	  if (!hh){
	    fprintf (stderr,
		     "error in processing %s, %s\n", sword, tword);
	    exit (-1);
	  } else {
	    UTFCHAR *myutf16;
	    
	    char myinbuf[100], myoutbuf[100];
	    char *myinptr, *myoutptr;
	    int myinlen, myoutlen;
	    size_t myiconv_ret;
	    int utf16len = 0;
	    
	    memset (myinbuf, 0, sizeof (myinbuf));
	    memset (myoutbuf, 0, sizeof (myoutbuf));
	    myinlen = strlen (hh->utf_hangul);
	    myoutlen = sizeof (myoutbuf);
	    myinptr = myinbuf, myoutptr = myoutbuf;

	    memcpy (myinbuf, hh->utf_hangul, myinlen);
	    myiconv_ret =
	      iconv (cd_utf8_to_utf16,
		     &myinptr, &myinlen, &myoutptr, &myoutlen);
	    utf16len = sizeof (myoutbuf) - myoutlen;
	    myutf16 =
	      (UTFCHAR *) calloc (utf16len + 1,
				  sizeof (UTFCHAR));
	    memcpy (myutf16, myoutbuf, utf16len);
	    
	    hval = hash (myutf16);
	    hhlist_add_hh (&hash_table[hval], hh);
	    free (myutf16);
	    
	  }
	  /* verification purpose */
	  hh_free (hh);
	}
      }

    fclose(s);
  }

  iconv_close (cd);
  iconv_close (cd_utf8_to_utf16);

  
  fdic = fopen (argv[2], "w");
  hash_table_dump_content (hash_table, HASH_TABLE_NUMBER, fdic);
	 
  fclose (fdic);

  for (i = 0; i < HASH_TABLE_NUMBER; i++){
    printf ("array [%d]: %d items\n", i, hash_table[i].n_count);
    /*
    hhlist_print_string (hash_table + i, NULL);
    */
    printf ("==================================\n");
  }


  return(0);
}

HH *
process_hangul_hanja_pair (iconv_t cd, char *str_hangul, char *str_hanja)
{
  int len_hangul, len_hanja;
  char *ptr_from, *ptr_to;
  char euc_hangul[100], euc_hanja[100];
  char utf_hangul[100], utf_hanja[100];
  unsigned char u8buffer [100];
  int len_from, len_to;
  size_t iconv_return;
  char *tmp;


  HH *hh_return;
  
  assert (str_hangul != NULL);
  assert (str_hanja != NULL);
  
  len_hangul = strlen (str_hangul);

  len_hanja = strlen (str_hanja);
  if (cd == (iconv_t)-1){
    hh_return = (HH *) calloc (1, sizeof (HH));
    hh_return->utf_hangul = (unsigned char *)strdup (str_hangul);
    hh_return->utf_hanja = (unsigned char *)strdup (str_hanja);
    return hh_return;
    
  } else {
  
    strcpy (euc_hangul, str_hangul);
    strcpy (euc_hanja, str_hanja);

    hh_return = (HH *) calloc (1, sizeof (HH));
    hh_return->utf_hangul = NULL;
    hh_return->utf_hanja = NULL;
  
    ptr_from = euc_hangul, ptr_to = utf_hangul;
    len_from = strlen (euc_hangul), len_to = sizeof (utf_hangul);
		  
    iconv_return =
      iconv (cd, &ptr_from, &len_from, &ptr_to, &len_to);
    if (iconv_return == (size_t) -1){
      fprintf (stderr, "There was an error doing iconv with %s\n",
	       euc_hangul);
      perror ("Following error:\n");
      return NULL;
    } else {
      /*
	int u8_len = 0;
      */
      int h;
      int i = 0;
      int j;

      j = 100 - len_to;

      hh_return->utf_hangul =
	(unsigned char *) calloc (j +1, sizeof (unsigned char));
      memcpy (hh_return->utf_hangul, utf_hangul, j);
    }
  
    ptr_from = euc_hanja, ptr_to = utf_hanja;
    len_from = strlen (euc_hanja), len_to = sizeof (utf_hanja);
		  
    iconv_return =
      iconv (cd, &ptr_from, &len_from, &ptr_to, &len_to);
    if (iconv_return == (size_t) -1){
      fprintf (stderr, "There was an error doing iconv with %s\n",
	       euc_hanja);
      perror ("Following error:\n");
      return NULL;
    } else {
      int h;
      int i = 0;
      int j;

      j = 100 - len_to;
    
      hh_return->utf_hanja =
	(unsigned char *) calloc (j + 1, sizeof (unsigned char));
      memcpy (hh_return->utf_hanja, utf_hanja, j);
    }
    return hh_return;
  }



}


iconv_t
prepare_iconv
(const char *to_encoding, const char *from_encoding)
{

  iconv_t cd;
  cd = iconv_open (to_encoding, from_encoding);
  if (cd == (iconv_t) -1){
    fprintf (stderr, "cannot open iconv\n");
    return (iconv_t) -1;	    
  } else
    return cd;
}


    
  
void
hash_table_init (HHList table[], int size)
{
  int i;
  for (i = 0; i < size; i++){
    hhlist_init (&table[i]);
  }
} 
