#****************************************************************************
#  ##   ##         #####   #####  ##     **        NoSQL RDBMS - mjoin      *
#  ###  ##        ####### ####### ##     **        $Revision: 2.1 $			*
#  #### ##        ###     ##   ## ##     ************************************
#  #######  ####  #####   ##   ## ##     **      Carlo Strozzi (c) 1998     *
#  ####### ######   ##### ## # ## ##     ************************************
#  ## #### ##  ##     ### ##  ### ##     **           Adapted by            *
#  ##  ### ###### ####### ######  ###### **          Carlo Strozzi          *
#  ##   ##  ####   #####   #### # ###### **     e-mail: carlos@linux.it     *
#****************************************************************************
#   NoSQL RDBMS, Copyright (C) 1998 Carlo Strozzi.                          *
#   This program comes with ABSOLUTELY NO WARRANTY; for details             *
#   refer to the GNU General Public License.                                *
#****************************************************************************
# Original code; jointbl,v 2.4 1993/03/29 13:34:46 hobbs
#****************************************************************************

$0 =~ s-.*/-- ;
while ( $ARGV[0] =~ /^-/ ) {				# Get args
    $_ = shift ;
    if( /^-c.*/ || /^--cartesian$/ ) { $CAR++ ; $MDJ++ ; next ; }
    if( /^-m.*/ || /^--master-detail$/ ) { $MDJ++ ; next ; }
    if( /^-n.*/ || /^--strip-header$/ ){ $NHDR++ ; next ; }
    die "\n$0: unknown option: $_\n" ; 
}
if( @ARGV < 2 ){ die "\n$0: not enough info, check the documentation\n" ; }
$file2 = pop( @ARGV ) ;
while ( @ARGV ) {				# Get columns
    $x = $y = shift ;
    if( $x =~ /=/ ){
	$x = $` ;
	$y = $' ; }
    push( @cola, $x ) ;
    push( @colb, $y ) ;
}
open( FILE2, $file2 ) || die "\n$0: can't open $file2\n" ;
$NA = $NB = 9999 ;	# temp
while(1){			# read header col names fm rdbtbl_1
    &geta ;
    die "\n$0: invalid header, table_1\n" if $eof ;
    last unless $a =~ /^\s*#/ ; 	# comment 
    print $a, "\n" unless $NHDR ;
}
while(1){			# read header col names fm rdbtbl_2
    &getb ;
    die "\n$0: invalid header, table_2\n" if $eof ;
    last unless $b =~ /^\s*#/ ; 	# comment 
    print $b, "\n" unless $NHDR ;
}
&set_key ;
&println unless $NHDR ;
&geta ; &getb ;			# header, col defns
die "\n$0: invalid header\n" if $eof ;
&println unless $NHDR ;

for( $i=0 ; $i < @KA ; $i++ ){			# chk for numeric comparsion
    $numcmp[$i] = 0 ;
    if( $F[$KA[$i]] =~ /(\S+)/ && $1 =~ /N/i ){
	$numcmp[$i] = 1 ; }
}
for( $i=0 ; $i < @KB ; $i++ ){
    if( $G[$KB[$i]] =~ /(\S+)/ && $1 =~ /N/i ){
	$numcmp[$i] = 1 ; }
}
&geta ; &getb ;
main: while( 1 ){				# main loop
    if( $eof ){
	if( ! $eofa && $MDJ ){
	    until( $eofa ){
		$aonly++ ; &println ;
		&geta ; } }
	if( ! $eofb && $CAR ){
	    until( $eofb ){
		$bonly++ ; &println ;
		&getb ; } }
	exit ;
    }
    while( &cmp_key < 0 ){			# key a < key b
	if( $MDJ ){ $aonly++ ; &println ; }
	&geta ; next main if $eofa ; }
    while( &cmp_key > 0 ){			# key a > key b
	if( $CAR ){ $bonly++ ; &println ; }
	&getb ; next main if $eofb ; }
    next if &cmp_key != 0 ;			# key a != key b
    &sav_key ;
    &println ;
    while( 1 ){			# get rest of equal b lines
	push( @GG, $b ) ; # save b lines
	&getb ;
	if( ! $eofb && &same_b ) {
	    &println ; }
	else {
	    @Gsav = @G ; # save @G
	    last ; }
    }
    while( 1 ){			# get rest of equal a lines
	&geta ;
	if( ! $eofa && &same_a ) {
	    for $b (@GG){
		@G = split( /\t/, $b, $NB ) ;
		&println ; } }
	else {
	    @GG = () ;
	    @G = @Gsav ; # restore @G
	    last ; }
    }
}
sub set_key {				# set the following key items:
		# $NA, $NB   - nr total cols in line a, b
		# $NOA, $NOB - nr non-key cols in line a, b
		# @KA, @KB   - indexes of key cols in line a, b
		# @OA, @OB   - indexes of non-key cols in line a, b
    $NA = @F ;
    $NB = @G ;
    for $col (@cola){
	for( $i=0, $hit=0 ; $i < @F ; $i++ ){
	    if( $col eq $F[$i] ){
		$hit++ ;
		push( @KA, $i ) ;	# gen @KA
		last ; }
	}
	die "\n$0: no column match in table_1: $col\n" if ! $hit ;
    }
    for( $i=0 ; $i < @F ; $i++ ){
	$hit=0 ;
	for (@KA){
	    if( $i == $_ ){
		$hit++ ;
		last ; }
	}
	push( @OA, $i ) if ! $hit ;	# gen @OA
    }
    $NOA = @OA ;
    for $col (@colb){
	for( $i=0, $hit=0 ; $i < @G ; $i++ ){
	    if( $col eq $G[$i] ){
		$hit++ ;
		push( @KB, $i ) ;	# gen @KB
		last ; }
	}
	die "\n$0: no column match in table_2: $col\n" if ! $hit ;
    }
    for( $i=0 ; $i < @G ; $i++ ){
	$hit=0 ;
	for (@KB){
	    if( $i == $_ ){
		$hit++ ;
		last ; }
	}
	push( @OB, $i ) if ! $hit ;	# gen @OB
    }
    $NOB = @OB ;
}
sub cmp_key {		# compares the value of key cols of line a & line b
			# returns -1, 0, 1 if a<b, a==b, or a>b

    return $cmpval if ! $cmpneed ;	# avoid unnecessary comparsions
    $cmpneed = 0 ;
    $cmpval  = 0 ;
    for( $i=0 ; $i < @KA ; $i++ ){
	$f = $KA[$i] ;
	$g = $KB[$i] ;
	if( $numcmp[$i] ){
	    if( $F[$f] < $G[$g] ){	# numeric comparsion
		$cmpval = -1 ;
		last ; }
	    if( $F[$f] > $G[$g] ){
		$cmpval = 1 ;
		last ; }
	}
	else{
	    if( $F[$f] lt $G[$g] ){	# string comparsion
		$cmpval = -1 ;
		last ; }
	    if( $F[$f] gt $G[$g] ){
		$cmpval = 1 ;
		last ; }
	}
    }
    $cmpval ;
}
sub println {			# print a line to the output rdbtable
    if( $bonly ){
	$x = 0 ;
	for $_ (@KB){			# key cols from line b
	    print "\t" if $x++ ;
	    print $G[$_] ; }
	print "\t" x $NOA ;		# nulls for line a part
    }
    else{
	$x = 0 ;
	for $_ (@KA){
	    print "\t" if $x++ ;
	    print $F[$_] ; }		# key cols from line a
	for $_ (@OA){
	    print "\t" ;
	    print $F[$_] ; }		# other cols from line a
    }
    if( $aonly ){
	print "\t" x $NOB ;		# nulls for line b part
    }
    else{
	for $_ (@OB){
	    print "\t" ;
	    print $G[$_] ; }		# other cols from line b
    }
    print "\n" ;
    $aonly = $bonly = 0 ;
}
sub geta {		# read next line from rdbtable_1 into $a & @F
    $cmpneed++ ;
    $a = <STDIN> ;
    if( $a ){
	chop $a ;
	@F = split( /\t/, $a, $NA ) ; }
    else{
	$eof++ ; $eofa++ ; }
}
sub getb {		# read next line from rdbtable_2 into $b & @G
    $cmpneed++ ;
    $b = <FILE2> ;
    if( $b ){
	chop $b ;
	@G = split( /\t/, $b, $NB ) ; }
    else{
	$eof++ ; $eofb++ ; }
}
sub sav_key {			# save key values from line b in @savkey
    @savkey = () ;
    for $_ (@KB){
	push( @savkey, $G[$_] ) ; }
}
sub same_a {	# return 1 if the values in @savkey are equal to the
		# key values in line a, else return 0
    $i = 0 ;
    for $_ (@KA){
	if( $F[$_] ne $savkey[$i++] ){
	    return 0 ; }
    }
    return 1 ;
}
sub same_b {	# return 1 if the values in @savkey are equal to the
		# key values in line b, else return 0
    $i = 0 ;
    for $_ (@KB){
	if( $G[$_] ne $savkey[$i++] ){
	    return 0 ; }
    }
    return 1 ;
}
