/*
 *
 * Copyright 1998-1999, University of Notre Dame.
 * Authors: Jeffrey M. Squyres, Kinis L. Meyer with M. D. McNally 
 *          and Andrew Lumsdaine
 *
 * This file is part of the Notre Dame LAM implementation of MPI.
 *
 * You should have received a copy of the License Agreement for the
 * Notre Dame LAM implementation of MPI along with the software; see
 * the file LICENSE.  If not, contact Office of Research, University
 * of Notre Dame, Notre Dame, IN 46556.
 *
 * Permission to modify the code and to distribute modified code is
 * granted, provided the text of this NOTICE is retained, a notice that
 * the code was modified is included with the above COPYRIGHT NOTICE and
 * with the COPYRIGHT NOTICE in the LICENSE file, and that the LICENSE
 * file is distributed with the modified code.
 *
 * LICENSOR MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED.
 * By way of example, but not limitation, Licensor MAKES NO
 * REPRESENTATIONS OR WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY
 * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE COMPONENTS
 * OR DOCUMENTATION WILL NOT INFRINGE ANY PATENTS, COPYRIGHTS, TRADEMARKS
 * OR OTHER RIGHTS.  
 *
 * Additional copyrights may follow.
 *
 *	Ohio Trollius
 *	Copyright 1996 The Ohio State University
 *	GDB
 *
 *	$Id: lambootagent.c,v 6.7 1999/08/14 22:57:18 lamteam Exp $
 * 
 *	Function:	- LAM boot agent
 *	Accepts:	- link array
 *			- link array size
 *			- # booted nodes (out)
 *			- # running nodes (out)
 */

#include <lam_config.h>
#include <sfh.h>

#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>

#include <args.h>
#include <debug.h>
#include <lamnet.h>
#include <net.h>
#include <portable.h>
#include <terror.h>
#include <typical.h>

/*
 * connection default timeout value
 */
#ifndef TO_BOOT
#define TO_BOOT	60
#endif

/*
 * local variables
 */
static int		fl_debug;		/* debug mode */
static int		fl_verbose;		/* verbose mode */
static char		buf[128];

/*
 * external functions
 */
extern int		sfh_sock_open_srv_inet_stm();
extern int		sfh_sock_open_clt_inet_stm();
extern int		sfh_sock_accept_tmout();
extern int		_lam_few();
extern int		inetexec();
extern int		readcltcoord();
extern int		writecltnbr();
extern int		writesockint4();
extern void		nodespin_end();
extern void		nodespin_init();
extern void		nodespin_next();

int
lambootagent(lamnet, nlamnet, nboot, nrun)

struct lamnode		*lamnet;
int			nlamnet;
int			*nboot;
int			*nrun;

{
	int		agent_port;	/* port number for replies */
	int		agent_sd;	/* socket for replies */
	int		boot_sd;	/* connection to new node */
	int		cmdc;		/* command vector count */
	int		dlport;
	int		i, j;
	int		r;
	int4		local;		/* local node ID */
	int4		origin;		/* origin node ID */
	char		**cmdv;		/* command vector */
	unsigned char	*p;

	*nboot = 0;
	*nrun = 0;

	if (nlamnet <= 0) {
		return(0);
	}
/*
 * Set the flags.
 */
	fl_debug = opt_taken('d');
	fl_verbose = opt_taken('v');
/*
 * Allocate a server socket and port.
 */
	agent_port = 0;
	agent_sd = sfh_sock_open_srv_inet_stm(&agent_port);
	if (agent_sd < 0) {
	  show_help("boot", "socket-fail", NULL);
	  return(LAMERROR);
	}
/*
 * Make the socket close on exec.
 */
	if (fcntl(agent_sd, F_SETFD, 1) == -1) {
	  show_help(NULL, "system-call-fail", "fcntl (set close-on-exec)", 
		    NULL);
	  return(LAMERROR);
	}
/*
 * Find the local node.
 */
	local = NOTNODEID;

	for (i = 0; (i < nlamnet) && (local == NOTNODEID); ++i) {

		if ((lamnet[i].lnd_nodeid != NOTNODEID) &&
				(lamnet[i].lnd_type & NT_ME)) {
			local = i;
		}
	}

	if (local == NOTNODEID) {
	  errno = EINVAL;
	  /* The help message displays the name of the schema file,
             and we don't have that here, so we must print it in the
             invoking function */
	  return(LAMERROR);
	}
/*
 * Find the origin node.
 */
	origin = NOTNODEID;

	for (i = 0; (i < nlamnet) && (origin == NOTNODEID); ++i) {

		if ((lamnet[i].lnd_nodeid != NOTNODEID) &&
				(lamnet[i].lnd_type & NT_ORIGIN)) {
			origin = i;
		}
	}
/*
 * Boot all valid links with type NT_BOOT.
 */
	for (i = 0; i < nlamnet; ++i) {
/*
 * Skip nodes that are invalid or already booted.
 */
		if ((lamnet[i].lnd_nodeid == NOTNODEID) ||
				!(lamnet[i].lnd_type & NT_BOOT)) continue;
/*
 * Invoke hboot on the new host.
 */
		cmdc = 0;
		cmdv = 0;
		argvadd(&cmdc, &cmdv, DEFTHBOOT);
		argvadd(&cmdc, &cmdv, "-t");
		argvadd(&cmdc, &cmdv, "-c");
		argvadd(&cmdc, &cmdv, "conf.lam");

		if (fl_debug) {
			argvadd(&cmdc, &cmdv, "-d");
		}
		if (fl_verbose) {
			argvadd(&cmdc, &cmdv, "-v");
		}
/*
 * If remote node, close stdio of processes.
 */
		if (i != local) {
			argvadd(&cmdc, &cmdv, "-s");
		}
/*
 * Override the $inet_topo variable.
 */
		p = (unsigned char *) &lamnet[local].lnd_addr.sin_addr;
		argvadd(&cmdc, &cmdv, "-I");
		sprintf(buf, "%c%s-H %u.%u.%u.%u -P %d -n %d -o %d%c",
			i == local ? ' ' : '"',
			opt_taken('x') ? "-x " : "",
			(unsigned) p[0], (unsigned) p[1],
			(unsigned) p[2], (unsigned) p[3],
			agent_port,
			i,
			origin,
			i == local ? ' ' : '"');
		argvadd(&cmdc, &cmdv, buf);

		VERBOSE("Executing %s on n%d (%s)...\n", DEFTHBOOT, i, 
			lamnet[i].lnd_hname);

		(*nboot)++;

		if (i == local) {
		        if (fl_debug) {
			  int j;
			  
			  fprintf(stderr, "lamboot: attempting to execute \"");
			  for (j = 0; j < cmdc; j++) {
			    if (j > 0)
			      fprintf(stderr, " ");
			    if (strchr(cmdv[j], ' ') != NULL)
			      fprintf(stderr, "\"%s\"", cmdv[j]);
			    else
			      fprintf(stderr, "%s", cmdv[j]);
			  }
			  fprintf(stderr, "\"\n");
			}
			r = _lam_few(cmdv);

			if (r) {
				(*nboot)--;
				errno = r;
				show_help("boot", "fork-fail", cmdv[0], NULL);
				argvfree(cmdv);
				return(LAMERROR);
			}
		} else {
			r = inetexec(lamnet[i].lnd_hname, lamnet[i].lnd_uname,
				     cmdv, (fl_debug ? "lamboot" : NULL));

			if (r) {
				(*nboot)--;
				argvfree(cmdv);
				/* inetexec will display errors if it
                                   fails */
				return(LAMERROR);
			}
		}
/*
 * Accept a connection from the new host.
 */
		boot_sd = sfh_sock_accept_tmout(agent_sd, TO_BOOT);
		if (boot_sd < 0) return(LAMERROR);
/*
 * Read the new host port numbers.
 */
		if (readcltcoord(boot_sd, &lamnet[i].lnd_bootport,
				&dlport)) return(LAMERROR);

		lamnet[i].lnd_addr.sin_port = htons((unsigned short) dlport);
/*
 * Close the host connection.
 */
		if (close(boot_sd)) return(LAMERROR);
		(*nrun)++;
	}

	if (close(agent_sd)) return(LAMERROR);

	if (fl_verbose) {
		nodespin_init("topology");
	}
/*
 * Send link information to all nodes that have been booted.
 */
	for (i = 0; i < nlamnet; ++i) {
/*
 * Skip nodes that are invalid or already booted.
 */
		if ((lamnet[i].lnd_nodeid == NOTNODEID) ||
				!(lamnet[i].lnd_type & NT_BOOT)) continue;

		if (fl_verbose) {
			nodespin_next((int4) i);
		}
/*
 * Connect to the new host.
 */
		boot_sd = sfh_sock_open_clt_inet_stm(
				(unsigned char *) &lamnet[i].lnd_addr.sin_addr,
				lamnet[i].lnd_bootport);
		if (boot_sd < 0) return(LAMERROR);
/*
 * Send it the number of links.
 */
		if (writesockint4(boot_sd, (int4) nlamnet)) return(LAMERROR);
/*
 * Loop sending info on all the links.
 */
		for (j = 0; j < nlamnet; ++j) {

			if (writecltnbr(boot_sd,
					(lamnet[j].lnd_nodeid == NOTNODEID) ?
					NOTLINKID : j,
					&lamnet[j].lnd_addr.sin_addr, (int)
					ntohs(lamnet[j].lnd_addr.sin_port)))
					return(LAMERROR);
		}

		if (close(boot_sd)) return(LAMERROR);
	}

	if (fl_verbose) {
		nodespin_end();
	}

	return(0);
}
