/*
 * @(#)$Id: b5encode.c,v 1.3 1994/05/05 15:43:08 shin Exp shin $
 * @(#) encode of a BIG5 text to 7-bit stream
 */

#include	<stdio.h>
#include	"hzb5.h"
#include	"ccode.h"

#define	updateSOmode1(hi,lo,so)						\
{									\
	int	so0;	/* current SO mode */				\
									\
	so0 = ( hi < 0xc9 ? MODE_SO_B5_1 : MODE_SO_B5_2);		\
	if ( so0 != so ) {						\
		Putchar(HZ_ESC);					\
		Putchar( (so0==MODE_SO_B5_1? HZ_B5_LS1 : HZ_B5_LS2) );	\
		so = so0;						\
	}								\
}									\

#define	updateSOmode3(hi,lo,so)						\
{									\
	int	so0;	/* current SO mode */				\
									\
	so0 = ( hi < 0xd0 ? MODE_SO_B5_1 : MODE_SO_B5_2);		\
	if ( so0 != so ) {						\
		Putchar(HZ_ESC);					\
		Putchar( (so0==MODE_SO_B5_1? HZ_B5_LS1 : HZ_B5_LS2) );	\
		so = so0;						\
	}								\
}									\

#define	updateSOmodeHZ(hi,lo,so)					\
{									\
	if ( so != MODE_SO_HZ ) {					\
		Putchar(HZ_ESC);					\
		Putchar(HZ_SO_HZ);					\
		so = MODE_SO_HZ;					\
	}								\
}

#define		STAT_ASC	0
#define		STAT_B5_C1L	1
#define		STAT_B5_C1H	2
#define		STAT_B5_C0	3
#define		STAT_B5_C1	4

#define	Getchar()	( n_read-- == 0 ? EOF : (unsigned char) *ip++ )
#define	Putchar(c)	{ *op++ = c; (*n_write)++; }

static	unsigned char	b[2];

#ifdef	B5E_MAIN
main(argc,argv)
int	argc;
char	**argv;
{
	char	ibuff[BUFSIZ], obuff[BUFSIZ];
	int	n;
	int	src, dst;

	src = CODE_BIG5;
	dst = DST_B5E;

	if ( !b5e_chkargv(argc, argv, &src, &dst) ) {
		b5e_usage(argc, argv);
		exit(1);
	}

	while ( fgets(ibuff, BUFSIZ, stdin) != NULL ) {
		b5encode(ibuff, obuff, strlen(ibuff), &n, src, dst);
		obuff[n] = '\0';
		fputs(obuff, stdout);
	}
}

b5e_chkargv(argc, argv, src, dst)
int	argc;
char	**argv;
int	*src, *dst;
{
	*src = CODE_BIG5; *dst = DST_B5E;

	if ( (argc<2) || argv[1][0] != '-' ) return(TRUE);	/* default */

	if ( argv[1][1] == 'b' )
		*src = CODE_BIG5;
	else if (argv[1][1] == 'g')
		*src = CODE_GB;
	else
		return(FALSE);

	if ( argv[1][2] == '7' )
		*dst = DST_B5E;
	else if (argv[1][2] == 'h')
		*dst = DST_HZ;
	else if (argv[1][2] == 'o')
		*dst = DST_OPT;
	else
		return(FALSE);

	return(TRUE);
}

b5e_usage(argc, argv)
int	argc;
char	**argv;
{
	printf("B5ENCODE ** Rev. 1.53 ** 1994/10\n\n");

	printf("Usage: %s [-st|-h] < infile > outfile\n", argv[0]);
	printf("-st: source code & target 7-bit representation\n");
	printf("-h : get this help message\n\n");

	printf("-b7: Big5 => 7-bit Big5 (in B5E3 format) (default)\n");
	printf("-bh: Big5 => HZ (7-bit)\n");
	printf("-bo: -bh optimal conversion mode:\n");
	printf("     Big5 => HZ if convertable to GB/HZ\n");
	printf("     Big5 => B5E3 if not convertable to GB/HZ\n\n");
	printf("-gh: GB => HZ\n");
	printf("-g7: GB => B5E3\n");
	printf("-go: -g7 optimal conversion mode:\n");
	printf("     GB => B5E3 if convertable to Big5/B5E3\n");
	printf("     GB => HZ if not convertable to Big5/B5E3\n");
	
}

#endif /* B5E_MAIN */

/*
 * @(#)b5enc: encode Big5 into 7-bit B5E3 format with printable ASCII chars
 */
b5enc(buff, wbuff, n_read, n_write)
char	*buff;
char	*wbuff;
int	n_read;
int	*n_write;
{
	b5encode(buff, wbuff, n_read, n_write, CODE_BIG5, DST_B5E);
}

/*
 * @(#)b5encode: encode chinese characters, Big5 or GB, into 7-bit
 *	ASCII characters, in either B5E3 or HZ
 */
b5encode(buff, wbuff, n_read, n_write, src, dst)
int	src;			/* source: CODE_BIG5 or CODE_GB */
int	dst;			/* HZ or B5E3 (Big5-7) */
char	*buff;			/* input buffer */
char	*wbuff;			/* output buffer */
int	n_read;			/* no. input/read characters */
int	*n_write;		/* no. output/write characters */
{
	char	*ip=buff;
	char	*op=wbuff;

	/*
	 * Note that I had provided only a state machine for encodeing
	 * and decoding. If it is necessary to call the encoder/decoder
	 * for several buffers, or i/o streams, the state variables should
	 * be changed to an array of states, each for one conversion buffer.
	 */

	static int state ;
	static int so ;
	static int called = 0;
	static int c, c0;			/* current and 1st byte */

	if ( called == 0 ) {
		called++;
		state = STAT_ASC;
		so = MODE_SO_ASC;
	}

	*n_write = 0;

	while ( (c = Getchar() ) != EOF ) {
		switch (state) {
		case STAT_ASC:
			if ( (src==CODE_BIG5 && is_high(c))
			||   (src==CODE_GB   && is_GB1(c))) {
				c0 = c;
				state = STAT_B5_C0;
			}
			else {
				if ( so ) {
				/* previously not in ASC state */
					Putchar(HZ_ESC);
					Putchar(HZ_SI);
					so = MODE_SO_ASC;
				}
#ifndef	B5E_ONLINE
				if ( c == HZ_ESC )	/* '~' => '~~' */
					Putchar(c);
#endif
				Putchar(c);
			}
			break;
		case STAT_B5_C0:
			if ( src==CODE_BIG5 && dst==DST_B5E ) {
				updateSOmode3(c0,c,so);
				B5E3(&c0,&c);
			}
			else if (src==CODE_GB && dst==DST_HZ) {
				updateSOmodeHZ(c0,c,so);
				c0 &= 0x7f; c &= 0x7f;
			}
			else if (src==CODE_BIG5) {
			/*
			 * mixed & optimal encoding
			 * that preserve untranslatable chars
			 */
				b[0] = c0; b[1] = c;
				if ( !b2g(b) && dst==DST_OPT ){	/* B5 -> B5E3 */
					updateSOmode3(c0,c,so);
					B5E3(&c0,&c);
				}
				else {				/* B5 -> HZ */
					updateSOmodeHZ(b[0],b[1],so);
					c0 = b[0] & 0x7f; c = b[1] & 0x7f;
				}
			}
			else {	/* src == CODE_GB */	
				b[0] = c0; b[1] = c;
				if ( !g2b(b) && dst==DST_OPT ){ /* GB -> HZ */
					updateSOmodeHZ(c0,c,so);
					c0 &= 0x7f; c &= 0x7f;
				}
				else {				/* GB -> B5E3 */
					updateSOmode3(b[0],b[1],so);
					B5E3(&b[0], &b[1]);
					c0 = b[0]; c = b[1];
				}
			}
			Putchar(c0); Putchar(c);
			state = STAT_B5_C1;
			break;
		case STAT_B5_C1:
			if ( (src==CODE_BIG5 && is_high(c))
			||   (src==CODE_GB   && is_GB1(c))) {
				c0 = c;
				state = STAT_B5_C0;
			}
			else {
				Putchar(HZ_ESC);
				Putchar(HZ_SI);
#ifndef	B5E_ONLINE
				if ( c == HZ_ESC )	/* '~' => '~~' */
					Putchar(c);
#endif
				Putchar(c);
				so = MODE_SO_ASC;
				state = STAT_ASC;
			}
			break;
		default:
			break;
		} /* switch */
	} /* while */
}


/*
 * @(#)Big5 encoding with the B5E1 proposal by Jing-Shin Chang
 */
B5E1(hi,lo)
int	*hi, *lo;
{
	*hi &= 0x7f;
	*lo &= 0x7f;
}

/*
 * @(#)Big5 encoding with the B5E2 proposal by Jing-Shin Chang
 */
B5E2(hi,lo)
int	*hi, *lo;
{

	b5cvt(*hi,*lo,0xa1,0xc8,0xa1,0xfe,&=0x7f,&=0x7f);	/* F1 */
	b5cvt(*hi,*lo,0xa1,0xc8,0x40,0x7e,-=0x58,|=0x00);	/* F2 */

	b5cvt(*hi,*lo,0xc9,0xfe,0xa1,0xfe,&=0x7f,&=0x7f);	/* R1 */
	b5cvt(*hi,*lo,0xc9,0xf0,0x40,0x7e,-=0xa8,|=0x00);	/* R2 */

	b5cvt(*hi,*lo,0xf1,0xfe,0x40,0x5e,-=0xd0,-=0x1f);	/* R3.1 */
	b5cvt(*hi,*lo,0xf1,0xfe,0x5f,0x7d,-=0xc2,-=0x3e);	/* R3.2 */
	b5cvt(*hi,*lo,0xf1,0xfc,0x7e,0x7e,-=0xb4,-=0x5d);	/* R3.3 */
	b5cvt(*hi,*lo,0xfd,0xfe,0x7e,0x7e,-=0xc0,-=0x5c)	/* R3.4 */

}

/*
 * @(#)Big5 encoding with the B5E3 proposal by Jing-Shin Chang
 */
B5E3(hi,lo)
int	*hi, *lo;
{

	if ( *lo>=0xa1 && *lo<=0xfe ) {			/* F1+/R1+ */
		*hi &= 0x7f;				/* CLR */
		*lo &= 0x7f;				/* CLR */
	}
	else if ( *lo>=0x40 && *lo<=0x7e )
		*hi -= ( *hi < 0xd0 ? 0x51 : 0xaf );	/* F2+/R2+ */

	/* Note: the byte range checking can be made less restricted */
}

/*
 *	- Big5 code encoding with the HZ-S proposal by Steve Simpson
 *	** NOT tested **
 */
b5ehzs(b1, b2, a1, a2)
    int b1, b2, *a1, *a2;
{
    unsigned long n;

    n = 157 * ((b1 < 0xC9) ? (b1 - 0xA1) : (b1 - 0xC9)) 
	+ ((b2 < 0xA1) ? (b2 - 0x40) : (b2 - 0xA1 + 63));

    *a1 = 0x21 + (int) (n / 94);

    *a2 = 0x21 + (int) (n % 94);
}

/*
 *	- Big5 code encoding with the HZ-2 proposal by Ya-Gui Wei
 *	** NOT tested **
 */
b5ehz2(b1,b2,a1,a2)
int	*a1, *a2;	/* 1st and 2nd bytes of ASCII/HZ-2 */
int	b1, b2;		/* 1st and 2nd bytes Big5 */
{
	if ( b1 >= 0xa1 && b1 <= 0xcf ) {
        	*a1 = ((b1 - 0xA1) << 1) + (b2 >> 7) + 0x21;
        	*a2 = b2 & 0x7F;
	}
	else if ( b1 >= 0xd0 && b1 <= 0xfe ) {
        	*a1 = ((b1 - 0xD0) << 1) + (b2 >> 7) + 0x21;
        	*a2 = b2 & 0x7F;
	} else {
		*a1 = b1;
		*a2 = b2;
	}
}

/*
 * History:
 *
 * $Log: b5encode.c,v $
 * Revision 1.3  1994/05/05  15:43:08  shin
 * 1. implement B5E2 proposal
 * 2. add b5e()
 *
 * Revision 1.2  1994/05/04  10:33:16  shin
 * 1. change ESC seq to HZ+S proposal
 *
 * Revision 1.1  1994/05/01  19:01:15  shin
 * Initial revision
 *
 */
