/* Distributed Checksum Clearinghouse
 *
 * send a request from client to server
 *
 * Copyright (c) 2005 by Rhyolite Software
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE DISCLAIMS ALL
 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE
 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 * SOFTWARE.
 *
 * Rhyolite Software DCC 1.2.74-1.108 $Revision$
 */

#include "dcc_clnt.h"
#ifdef USE_POLL
#include <poll.h>
#endif
#ifdef HAVE_ARPA_NAMESER_H
#include <arpa/nameser.h>
#endif
#ifdef HAVE_RESOLV_H
#include <resolv.h>
#endif

DCC_CLNT_INFO *dcc_clnt_info;		/* memory mapped shared data */
u_char dcc_all_srvrs = 0;		/* try to contact all servers */

/* #define CLNT_LOSSES */
#ifdef CLNT_LOSSES
static u_int clnt_losses;
#endif

#define DCC_SRVRS_MOD	    59
#define DCC_SRVRS_MOD_IPV4  128

u_char dcc_clnt_debug;
u_int dcc_min_delay_us = DCC_INIT_RTT;  /* override minimum RTT */
int dcc_debug_ttl;


#define AGE_AVG(_v,_n,_a,_b) ((_v) = ((_v)*_a + (_n)*_b + (_a+_b)/2)/(_a+_b))
#define FAST_RTT_SECS	(15*60)


char dcc_clnt_hostname[MAXHOSTNAMELEN];
static u_int32_t dcc_clnt_hid;		/* our DCC host-ID */


/* Each client knows about one or more servers, lest the current server
 * crash.  To ensure that counts of spam accumulate as quickly as possible,
 * all of the processes on a client try to use a single server.  The
 * closest (or fastest) server is preferred.  It is desirable for the
 * servers to convert the hostnames of the servers to IP addresses
 * frequently enough to track changes in address records, but not so
 * often that a lot of time is wasted on the DNS.
 *
 * All of that implies that independent processes on the client need to
 * cooperate in measuring the round trip time to the servers and maintaining
 * their IP addresses.  On UNIX systems, this is accomplished with mmap()
 * and a well known file.
 */



/* the contexts must be locked to read or change these values */
static int info_fd = -1;
#ifdef DCC_WIN32
HANDLE info_map = INVALID_HANDLE_VALUE;
#endif
DCC_PATH dcc_info_nm;
static u_char info_locked;		/* kludge to prevent double-tripping */
static u_char resolve_locked;		/* ditto */

/* Some systems have a broken fcntl().  Examples include Solaris over NFS.
 * Their bug is that you can lock entire files, but not ranges within a file. */
#ifdef DCC_BAD_FCNTL_LOCKS
#define INFO_LOCK_NUM       DCC_LOCK_ALL_FILE
#define RESOLVE_LOCK_NUM    DCC_LOCK_ALL_FILE
#else
#define INFO_LOCK_NUM       0
#define RESOLVE_LOCK_NUM    1
#endif


/* The DCC client uses two levels of locking.  The first is for data
 * common to one process and shared among threads in that process,
 * and the second is among processes sharing the mmap()'ed information.
 * Threads are used in long lived processes such as the sendmail milter
 * filter server.  Multiple processes are needed to serve other
 * local clients of the DCC, such as procmail jobs */


#define AP2CLASS(ap) DCC_GREY2CLASS(ap >= dcc_clnt_info->grey.addrs)


static u_char				/* 1=the addresses are equal */
dcc_cmp_ap2su(const DCC_SRVR_ADDR *ap, const DCC_SOCKU *su)
{
	struct in_addr ap_addr4, su_addr4;

	if (su->sa.sa_family != ap->family)
		return 0;

	if (su->sa.sa_family == AF_INET) {
		return (su->ipv4.sin_addr.s_addr == ap->ip.v4.s_addr
			&& su->ipv4.sin_port == ap->port);
	}

	if (su->ipv6.sin6_port != ap->port)
		return 0;

	if (dcc_ipv6toipv4(&ap_addr4, &ap->ip.v6)
	    && dcc_ipv6toipv4(&su_addr4, &su->ipv6.sin6_addr))
		return ap_addr4.s_addr == su_addr4.s_addr;

	return !memcmp(&su->ipv6.sin6_addr, &ap->ip.v6,
		       sizeof(su->ipv6.sin6_addr));
}



const char *
dcc_ap2str(const DCC_SRVR_ADDR *ap)
{
	DCC_SOCKU su;

	dcc_mk_su(&su, ap->family, &ap->ip, ap->port);
	return dcc_su2str(&su);
}



const char *
dcc_ap2str_opt(const DCC_SRVR_ADDR *ap,
	       u_int16_t def_port,
	       char port_str)		/* '\0' or '-' */
{
	DCC_SOCKU su;

	memset(&su, 0, sizeof(su));
	if ((su.sa.sa_family = ap->family) == AF_INET) {
		su.ipv4.sin_addr.s_addr = ap->ip.v4.s_addr;
		su.ipv4.sin_port = ap->port;
	} else {
		memcpy(&su.ipv6.sin6_addr, &ap->ip.v6,
		       sizeof(su.ipv6.sin6_addr));
		su.ipv6.sin6_port = ap->port;
	}
	return dcc_su2str_opt(&su, def_port, port_str);
}



static const char *
addr2str(char *buf, u_int buf_len, DCC_SRVR_CLASS *class,
	 int addrs_gen, const DCC_SRVR_ADDR *ap, const DCC_SOCKU *sup)
{
	if (class->gen == addrs_gen) {
		snprintf(buf, buf_len, "%s (%s)",
			 class->nms[ap->nm_inx].hostname,
			 sup ? dcc_su2str(sup) : dcc_ap2str(ap));
	} else if (sup) {
		snprintf(buf, buf_len, "%s",
			 dcc_su2str(sup));
	} else {
		snprintf(buf, buf_len, "DCC server");
	}
	return buf;
}



static const char *
addr_inx2str(char *buf, u_int buf_len, DCC_SRVR_CLASS *class,
	     int addrs_gen, int addr_inx, const DCC_SOCKU *sup)
{
	return addr2str(buf, buf_len, class,
			addrs_gen, &class->addrs[addr_inx], sup);
}


static void
trace_perf(const char *msg, const DCC_SRVR_ADDR *ap)
{
	DCC_SRVR_CLASS *class;
	char abuf[60];
	char rbuf[30];

	class = AP2CLASS(ap);
	if (class->nms[ap->nm_inx].rtt_adj == 0) {
		rbuf[0] = 0;
	} else {
		snprintf(rbuf, sizeof(rbuf), "%+d",
			 class->nms[ap->nm_inx].rtt_adj/1000);
	}

	if (ap->rtt == DCC_RTT_BAD) {
		dcc_trace_msg("%s %s with unknown RTT",
			      msg, addr2str(abuf, sizeof(abuf), class,
					    class->gen, ap, 0));
	} else if (ap->total_xmits == 0) {
		dcc_trace_msg("%s %s with %.2f%s ms RTT, %d ms queue wait",
			      msg, addr2str(abuf, sizeof(abuf), class,
					    class->gen, ap, 0),
			      ap->rtt/1000.0, rbuf,
			      ap->srvr_wait/1000);
	} else {
		dcc_trace_msg("%s %s with %.0f%% of %d requests answered,"
			      " %.2f%s ms RTT, %d ms queue wait",
			      msg, addr2str(abuf, sizeof(abuf), class,
					    class->gen, ap, 0),
			      (ap->total_resps*100.0)/ap->total_xmits,
			      ap->total_xmits,
			      ap->rtt/1000.0, rbuf,
			      ap->srvr_wait/1000);
	}
}



/* If the socket isn't always connected, it can receive
 * datagrams from almost everywhere (for example, a DNS
 * datagram could leak-in if the local port range is small
 * and the local port has been recently doing DNS queries
 * in its previous life).
 *
 * If the socket is connected, it can still receive
 * datagrams not belonging to the connection per se. This
 * will happen if it has been disconnected recently and there
 * was pending data in the socket's queue.
 *
 * Before complaining, check that this datagram seems to be a response
 * to something we sent */
static void PATTRIB(3,4)
trace_bad_packet(const DCC_XLOG *xlog, const DCC_SOCKU *su, const char *p, ...)
{
	const DCC_XLOG_ENTRY *xloge;
	va_list args;

	if (!dcc_clnt_debug) {
		for (xloge = xlog->base; ; ++xloge) {
			/* forget the error message if not from a DCC server */
			if (xloge >= xlog->next)
				return;

			/* Don't check this server entry if we haven't
			 * transmitted anything to this host. */
			if (xloge->op_nums.t == DCC_OP_NUMS_NULL)
				continue;

			if (!memcmp(su, &xloge->su, sizeof(*su)))
				break;
		}
	}

	va_start(args, p);
	dcc_verror_msg(p, args);
	va_end(args);
}



#define FOREVER_SECS	1000
#define FOREVER_USECS	(FOREVER_SECS*DCC_USECS)
static long
tvs2us(struct timeval *tv1, struct timeval *tv2)
{
	long us;

	/* prevent overflow */
	us = tv1->tv_sec - tv2->tv_sec;
	if (us <= -FOREVER_SECS)
		return -FOREVER_USECS;
	if (us >= FOREVER_SECS)
		return FOREVER_USECS;
	us = us*DCC_USECS + (tv1->tv_usec - tv2->tv_usec);
	return us;
}



/* Compute the delay before the next retransmission
 *      It always should be long enough for the DCC server to do some disk
 *	operations even if the server and network have usually been faster. */
#define MIN_RETRANS_TIME(rtt,xmit_num) ((rtt)<<(xmit_num))
static int
dcc_retrans_time(u_int rtt, u_int xmit_num)
{
	if (rtt < DCC_INIT_RTT)
		rtt = DCC_INIT_RTT;
	rtt <<= xmit_num;
	if (rtt > DCC_MAX_RTT)
		rtt = DCC_MAX_RTT;
	if (rtt < dcc_min_delay_us)
		rtt = dcc_min_delay_us;
	return rtt;
}



static void
get_start_time(DCC_CLNT_CTXT *ctxt)
{
	gettimeofday(&ctxt->start, 0);
	ctxt->now = ctxt->start;
	ctxt->now_us = 0;
}



static u_char				/* 1=ok, 0=time jumped */
get_now(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt)
{
	gettimeofday(&ctxt->now, 0);
	ctxt->now_us = tvs2us(&ctxt->now, &ctxt->start);
	if (ctxt->now_us >= 0 && ctxt->now_us < FOREVER_USECS)
		return 1;

	/* ignore tiny reverse time jumps on some systems such as BSD/OS 4.1 */
	if (ctxt->now_us < 0
	    && ctxt->now_us > -1000) {
		ctxt->now = ctxt->start;
		ctxt->now_us = 0;
		return 1;
	}

	dcc_pemsg(EX_OSERR, emsg,
		  "clock changed an impossible %.6f seconds",
		  ctxt->now_us/(1000.0*1000.0));
	return 0;
}



/* All servers are broken, so make a note to not try for a while.
 *      The contexts and the mapped information must be locked */
static void
fail_more(DCC_SRVR_CLASS *class)
{
	if (++class->fail_exp > DCC_MAX_FAIL_EXP)
		class->fail_exp = DCC_MAX_FAIL_EXP;
	class->fail_time = (time(0) + (DCC_INIT_FAIL_SECS << class->fail_exp));
}



static u_char				/* 0=failing */
ck_fail_time(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class)
{
	int dt;

	if (class->fail_exp == 0)
		return 1;

	dt = class->fail_time - ctxt->now.tv_sec;
	if (dt > 0 && dt <= DCC_MAX_FAIL_SECS) {
		dcc_pemsg(EX_IOERR, emsg,
			  "continue not asking %s %d seconds after failure",
			  DCC_IS_GREY_STR(class), dt);
		return 0;
	}

	class->fail_exp = 0;
	return 1;
}



/* Unlock the shared memory.
 *      The contexts must be locked */
u_char					/* 0=failed 1=ok */
dcc_info_unlock(DCC_EMSG emsg)
{
	if (!info_locked)
		return 1;

	info_locked = 0;
#if INFO_LOCK_NUM==DCC_LOCK_ALL_FILE
	if (!resolve_locked) {
#endif
		if (!dcc_unlock_fd(emsg, info_fd, INFO_LOCK_NUM,
				   "DCC map ", dcc_info_nm))
			return 0;
#if RESOLVE_LOCK_NUM==DCC_LOCK_ALL_FILE
	}
#endif
	return 1;
}



/* Lock the shared memory so we can read and perhaps change it
 *      The contexts must be locked */
u_char					/* 0=failed, 1=ok */
dcc_info_lock(DCC_EMSG emsg)
{
	if (info_locked)
		return 1;

#if INFO_LOCK_NUM==DCC_LOCK_ALL_FILE
	if (!resolve_locked) {
#endif
		if (!dcc_exlock_fd(emsg, info_fd, INFO_LOCK_NUM,
				   "DCC map ", dcc_info_nm))
			return 0;
#if INFO_LOCK_NUM==DCC_LOCK_ALL_FILE
	}
#endif
	info_locked = 1;
	return 1;
}



/* stop working on server IP addresses
 *      The contexts must be locked, and remain locked on exit. */
u_char					/* 0=failed, 1=ok */
dcc_resolve_unlock(DCC_EMSG emsg)
{
	if (!resolve_locked) {
		if (dcc_clnt_debug)
			dcc_trace_msg("unlocking unlocked resolve lock");
	} else {
		resolve_locked = 0;

#if RESOLVE_LOCK_NUM==DCC_LOCK_ALL_FILE
		if (!info_locked) {
#endif
			if (!dcc_unlock_fd(emsg, info_fd, RESOLVE_LOCK_NUM,
					   "resolve lock ", dcc_info_nm)) {
				dcc_resolve_mutex_unlock();
				return 0;
			}
#if RESOLVE_LOCK_NUM==DCC_LOCK_ALL_FILE
		}
#endif
	}
	dcc_resolve_mutex_unlock();

	return 1;
}



/* Lock resolving server hostnames
 *      The contexts be locked, and the shared information may be locked.
 *      All three are locked on success.
 *      The contexts remain locked but the others are unlocked on failure. */
u_char					/* 0=failed, 1=ok */
dcc_resolve_lock(DCC_EMSG emsg)
{
	/* unlock the shared data and the contexts while we wait for the
	 * dual mutex & file lock to resolve server hostnames */
	if (!dcc_info_unlock(emsg))
		return 0;
	dcc_ctxts_unlock();

	if (!dcc_resolve_mutex_lock(0)) {
		dcc_ctxts_lock();
		return 0;
	}
	if (resolve_locked)
		dcc_logbad(EX_SOFTWARE, "locking locked resolve lock");
	if (!dcc_exlock_fd(emsg, info_fd, RESOLVE_LOCK_NUM,
			   "resolve lock ", dcc_info_nm)) {
		dcc_resolve_mutex_unlock();
		dcc_ctxts_lock();
		return 0;
	}
	resolve_locked = 1;

	dcc_ctxts_lock();
	if (!dcc_info_lock(emsg)) {
		dcc_resolve_unlock(0);
		return 0;
	}
	return 1;
}



/* Unlock and un-map the shared info.
 *      The contexts must be locked */
u_char					/* 0=something wrong, 1=all over */
dcc_unmap_info(DCC_EMSG emsg)
{
	u_char result = 1;

	if (!dcc_clnt_info)
		return result;

	if (!dcc_info_unlock(emsg))
		result = 0;

#ifdef DCC_WIN32
	win32_unmap(&info_map, dcc_clnt_info, dcc_info_nm);
#else
	if (0 > munmap((void *)dcc_clnt_info, sizeof(*dcc_clnt_info))) {
		dcc_pemsg(EX_OSERR, emsg, "munmap(%s): %s",
			  dcc_info_nm, ERROR_STR());
		result = 0;
	}
#endif
	dcc_clnt_info = 0;

	if (0 > close(info_fd)) {
		if (emsg && *emsg == '\0')
			dcc_pemsg(EX_IOERR, emsg, "close(%s): %s",
				  dcc_info_nm, ERROR_STR());
		result = 0;
	}
	info_fd = -1;

	return result;
}



/* discover our host ID if we do not already know it */
static u_char
get_clnt_hid(DCC_EMSG emsg)
{
	int i;

	if (dcc_clnt_hid)
		return 1;

#ifdef HAVE_GETHOSTID
	dcc_clnt_hid = gethostid();
#endif
	if (0 > gethostname(dcc_clnt_hostname,
			    sizeof(dcc_clnt_hostname)-1)) {
		dcc_pemsg(EX_NOHOST, emsg, "gethostname(): %s", ERROR_STR());
		return 0;
	}
	if (dcc_clnt_hostname[0] == '\0') {
		dcc_pemsg(EX_NOHOST, emsg, "null hostname from gethostname()");
		return 0;
	}
	for (i = 0; i < ISZ(dcc_clnt_hostname); ++i) {
		if (!dcc_clnt_hostname[i])
			break;
		dcc_clnt_hid += dcc_clnt_hostname[i]*i;
	}
	return 1;
}



/* write a new DCC map file */
u_char
dcc_create_map(DCC_EMSG emsg, const DCC_PATH map_nm0, int *pfd,
	       const DCC_SRVR_NM *dcc_nms, int dcc_nms_len,
	       const DCC_SRVR_NM *grey_nms, int grey_nms_len,
	       u_char flags)
{
	static int op_nums_r;
	DCC_CLNT_INFO info_clear;
	int fd;
	u_char created;
	DCC_PATH map_nm;
	int i;

	if (pfd && (fd = *pfd) >= 0) {
		created = 0;
	} else {
		dcc_fnm2path(map_nm, map_nm0);
		fd = open(map_nm, O_RDWR|O_CREAT|O_EXCL, 0600);
		if (fd < 0) {
			dcc_pemsg(EX_SOFTWARE, emsg, "open(%s): %s",
				  map_nm, ERROR_STR());
			return 0;
		}
		created = 1;
	}

	memset(&info_clear, 0, sizeof(info_clear));
	strcpy(info_clear.version, DCC_MAP_INFO_VERSION);

	if (dcc_nms_len != 0) {
		if (dcc_nms_len > DCC_MAX_SRVR_NMS)
			dcc_nms_len = DCC_MAX_SRVR_NMS;
		memcpy(info_clear.dcc.nms, dcc_nms,
		       sizeof(info_clear.dcc.nms[0])*dcc_nms_len);
	}
	info_clear.dcc.act_inx = -1;

	if (grey_nms_len != 0) {
		if (grey_nms_len > DCC_MAX_SRVR_NMS)
			grey_nms_len = DCC_MAX_SRVR_NMS;
		memcpy(info_clear.grey.nms, grey_nms,
		       sizeof(info_clear.grey.nms[0])*grey_nms_len);
	}
	info_clear.grey.act_inx = -1;

	info_clear.flags = flags;
	if (!get_clnt_hid(emsg)) {
		close(fd);
		if (pfd)
			*pfd = -1;
		if (created)
			unlink(map_nm);
		return 0;
	}
	info_clear.residue = dcc_clnt_hid % DCC_SRVRS_MOD;
	if (info_clear.residue == 0)
		info_clear.residue = 1;

	/* ensure that we have a new report # even if we are repeatedly
	 * recreating a temporary map file */
	if (dcc_clnt_info)
		op_nums_r += dcc_clnt_info->proto_hdr.op_nums.r;
	info_clear.proto_hdr.op_nums.r = ++op_nums_r;

	i = write(fd, &info_clear, sizeof(info_clear));
	if (i != ISZ(info_clear)) {
		if (i < 0)
			dcc_pemsg(EX_SOFTWARE, emsg, "write(%s): %s",
				  map_nm, ERROR_STR());
		else
			dcc_pemsg(EX_SOFTWARE, emsg,
				  "write(%s)=%d instead of %d",
				  map_nm, i, ISZ(info_clear));
		close(fd);
		if (pfd)
			*pfd = -1;
		if (created)
			unlink(map_nm);
		return 0;
	}

	if (created) {
		if (pfd)
			*pfd = fd;
		else
			close(fd);
	}
	return 1;
}



#ifdef DCC_MAP_INFO_VERSION_5
static int				/* -1=error, 0=wrong version, 1=done */
map_convert_start(DCC_EMSG emsg, void *old_info, int old_info_size,
		  const char *old_magic, int old_magic_size,
		  DCC_PATH new_info_nm)
{
	int i;

	/* only one process or thread can fix the file so wait for
	 * exclusive access to the file */
	if (!dcc_info_lock(emsg))
		return -1;

	i = read(info_fd, old_info, old_info_size);
	if (i != old_info_size) {
		if (i < 0) {
			dcc_pemsg(EX_IOERR, emsg, "read(%s): %s",
				  dcc_info_nm, ERROR_STR());
		} else {
			dcc_pemsg(EX_IOERR, emsg, "read(%s)=%d instead of %d",
				  dcc_info_nm, i, old_info_size);
		}
		return -1;
	}

	if (-1 == lseek(info_fd, SEEK_SET, 0)) {
		dcc_pemsg(EX_IOERR, emsg, "lseek(%s): %s",
			  dcc_info_nm, ERROR_STR());
		return -1;
	}

	if (strncmp(old_info, old_magic, old_magic_size)) {
		if (!dcc_info_unlock(emsg))
			return -1;
		return 0;
	}

	dcc_fnm2path(new_info_nm, dcc_info_nm);
	if (strlen(new_info_nm) >= sizeof(DCC_PATH) - sizeof("-new")) {
		dcc_pemsg(EX_IOERR, emsg, "too long map name \"%s\"",
			  dcc_info_nm);
		return -1;
	}
	strcat(new_info_nm, "-new");
	unlink(new_info_nm);
	return 1;
}



static int				/* -1=error, 1=done */
map_convert_fin(DCC_EMSG emsg,
		DCC_PATH new_info_nm, int new_fd, struct stat *old_sb)
{
	/* if we are running as root,
	 * don't change the owner of the file */
	if (getuid() == 0
	    && 0 > fchown(new_fd, old_sb->st_uid, old_sb->st_gid)) {
		dcc_pemsg(EX_IOERR, emsg, "chown(%s,%d,%d): %s",
			  new_info_nm, (int)old_sb->st_uid, (int)old_sb->st_gid,
			  ERROR_STR());
		unlink(new_info_nm);
		close(new_fd);
		return -1;
	}

	if (0 > rename(new_info_nm, dcc_info_nm)) {
		dcc_pemsg(EX_IOERR, emsg, "rename(%s, %s): %s",
			  new_info_nm, dcc_info_nm, ERROR_STR());
		unlink(new_info_nm);
		close(new_fd);
		return -1;
	}

	close(new_fd);
	return 1;
}



/* Convert an old map file.
 *      The contexts must be locked on entry.
 *      The old file may be locked on exit */
static int				/* -1=error, 0=wrong version, 1=done */
map_convert_v5(DCC_EMSG emsg, struct stat *old_sb)
{
	DCC_PATH new_info_nm;
	DCC_SRVR_NM new_nms[DCC_MAX_SRVR_NMS];
	DCC_V5_CLNT_INFO old_info;
	int new_fd;
	int i;

	if ((int)old_sb->st_size < ISZ(DCC_V5_CLNT_INFO))
		return 0;

	i = map_convert_start(emsg, &old_info, sizeof(DCC_V5_CLNT_INFO),
			      DCC_MAP_INFO_VERSION_5, sizeof(old_info.version),
			      new_info_nm);
	if (i <= 0)
		return i;

	memset(&new_nms, 0, sizeof(new_nms));
	for (i = 0; i < DIM(new_nms); ++i) {
		new_nms[i].clnt_id = old_info.nms[i].clnt_id;
		new_nms[i].port = old_info.nms[i].port;
		strcpy(new_nms[i].hostname, old_info.nms[i].hostname);
		memcpy(new_nms[i].passwd, old_info.nms[i].passwd,
		       sizeof(new_nms[i].passwd));
		new_nms[i].rtt_adj = old_info.nms[i].rtt_adj*10*1000;
	}
	new_fd = INVALID_HANDLE_VALUE;
	if (!dcc_create_map(emsg, new_info_nm, &new_fd,
			    new_nms, DIM(new_nms), 0, 0, old_info.flags))
		return -1;

	return map_convert_fin(emsg, new_info_nm, new_fd, old_sb);
}



#endif /* DCC_MAP_INFO_VERSION_5 */
#ifdef DCC_MAP_INFO_VERSION_6
/* Convert an old map file.
 *      The contexts must be locked on entry.
 *      The old file may be locked on exit */
static u_char
map_convert_v6(DCC_EMSG emsg, struct stat *old_sb)
{
	DCC_PATH new_info_nm;
	DCC_SRVR_NM new_nms[DCC_MAX_SRVR_NMS];
	DCC_V6_CLNT_INFO old_info;
	int new_fd;
	int i;

	if ((int)old_sb->st_size < ISZ(DCC_V6_CLNT_INFO))
		return 0;

	i = map_convert_start(emsg, &old_info, sizeof(DCC_V6_CLNT_INFO),
			      DCC_MAP_INFO_VERSION_6, sizeof(old_info.version),
			      new_info_nm);
	if (i <= 0)
		return i;

	memset(&new_nms, 0, sizeof(new_nms));
	for (i = 0; i < DIM(new_nms); ++i) {
		new_nms[i].clnt_id = old_info.nms[i].clnt_id;
		new_nms[i].port = old_info.nms[i].port;
		strcpy(new_nms[i].hostname, old_info.nms[i].hostname);
		memcpy(new_nms[i].passwd, old_info.nms[i].passwd,
		       sizeof(new_nms[i].passwd));
		new_nms[i].rtt_adj = old_info.nms[i].rtt_adj;
	}
	new_fd = INVALID_HANDLE_VALUE;
	if (!dcc_create_map(emsg, new_info_nm, &new_fd,
			    new_nms, DIM(new_nms), 0, 0, old_info.flags))
		return -1;

	return map_convert_fin(emsg, new_info_nm, new_fd, old_sb);
}



#endif /* DCC_MAP_INFO_VERSION_6 */
#ifdef DCC_MAP_INFO_VERSION_7
/* Convert an old map file.
 *      The contexts must be locked on entry.
 *      The old file may be locked on exit */
static u_char
map_convert_v7(DCC_EMSG emsg, struct stat *old_sb)
{
	DCC_PATH new_info_nm;
	union {
	    DCC_V7_IPV6_CLNT_INFO   v6;
	    DCC_V7_NOIPV6_CLNT_INFO nov6;
	} old;
	int new_fd;
	int i;

	if (old_sb->st_size == sizeof(old.v6)) {
		i = map_convert_start(emsg, &old.v6, sizeof(old.v6),
				      DCC_MAP_INFO_VERSION_7,
				      sizeof(old.v6.version),
				      new_info_nm);
		if (i <= 0)
			return i;

		new_fd = INVALID_HANDLE_VALUE;
		if (!dcc_create_map(emsg, new_info_nm, &new_fd,
				    old.v6.dcc.nms, DIM(old.v6.dcc.nms),
				    old.v6.grey.nms, DIM(old.v6.grey.nms),
				    old.v6.flags))
			return -1;

	} else if (old_sb->st_size == sizeof(old.nov6)) {
		i = map_convert_start(emsg, &old.nov6, sizeof(old.nov6),
				      DCC_MAP_INFO_VERSION_7,
				      sizeof(old.nov6.version),
				      new_info_nm);
		if (i <= 0)
			return i;

		new_fd = INVALID_HANDLE_VALUE;
		if (!dcc_create_map(emsg, new_info_nm, &new_fd,
				    old.nov6.dcc.nms, DIM(old.nov6.dcc.nms),
				    old.nov6.grey.nms, DIM(old.nov6.grey.nms),
				    old.nov6.flags))
			return -1;

	} else {
		return 0;
	}

	return map_convert_fin(emsg, new_info_nm, new_fd, old_sb);
}



#endif /* DCC_MAP_INFO_VERSION_7 */
/* Ensure that the shared information is available, but do not lock it.
 *      The contexts must be locked
 *      SUID privileges are often released */
u_char					/* 0=problem, 1=nop, 2=mapped */
dcc_map_info(DCC_EMSG emsg, const char *new_info_nm, int new_info_fd)
{
	struct stat sb;
#ifndef DCC_WIN32
	void *p;
#endif
	u_char result;

	/* work only if needed, but always check for corruption */
	result = 1;
	while ((new_info_nm && strcmp(new_info_nm, dcc_info_nm))
	       || new_info_fd >= 0
	       || !dcc_clnt_info) {
		result = 2;
		if (!dcc_unmap_info(emsg)) {
			if (new_info_fd >= 0)
				close(new_info_fd);
			return 0;
		}

		if (new_info_nm) {
			dcc_fnm2path(dcc_info_nm, new_info_nm);
			/* don't change name if we convert the file */
			new_info_nm = 0;
		}
		if (dcc_info_nm[0] == '\0') {
			dcc_pemsg(EX_USAGE, emsg, "missing map file");
			return 0;
		}

		if (new_info_fd >= 0) {
			info_fd = new_info_fd;
			new_info_fd = -1;
		} else {
			info_fd = open(dcc_info_nm, O_RDWR, 0600);
#ifndef DCC_WIN32
			if (info_fd < 0
			    && dcc_get_priv_home(dcc_info_nm)) {
				info_fd = open(dcc_info_nm, O_RDWR, 0600);
				dcc_rel_priv();
			}
#endif
			if (info_fd < 0) {
				dcc_pemsg(EX_NOINPUT, emsg, "open(%s): %s",
					  dcc_info_nm, ERROR_STR());
				return 0;
			}
		}

		/* refuse to use the file if it is not private */
		if (!dcc_ck_private(emsg, &sb, dcc_info_nm, info_fd)) {
			dcc_unmap_info(0);
			return 0;
		}

		if ((int)sb.st_size != ISZ(*dcc_clnt_info)) {
#ifdef DCC_MAP_INFO_VERSION_7
			int i;

#ifdef DCC_MAP_INFO_VERSION_6
			i = map_convert_v5(emsg, &sb);
			if (i < 0) {
				dcc_unmap_info(0);
				return 0;
			}
			/* unlock old file and open & lock new file */
			if (i > 0)
				continue;

			i = map_convert_v6(emsg, &sb);
			if (i < 0) {
				dcc_unmap_info(0);
				return 0;
			}
			/* unlock old file and open & lock new file */
			if (i > 0)
				continue;
#endif /* DCC_MAP_INFO_VERSION_6 */
			i = map_convert_v7(emsg, &sb);
			if (i < 0) {
				dcc_unmap_info(0);
				return 0;
			}
			/* unlock old file and open & lock new file */
			if (i > 0)
				continue;
#endif /* DCC_MAP_INFO_VERSION_7 */
			dcc_pemsg(EX_DATAERR, emsg,
				  "%s is not the size of a DCC map file",
				  dcc_info_nm);
			close(info_fd);
			info_fd = -1;
			return 0;
		}

#ifdef DCC_WIN32
		dcc_clnt_info= win32_map(emsg, &info_map, dcc_info_nm,
					 info_fd, sizeof(*dcc_clnt_info));
		if (!dcc_clnt_info) {
			close(info_fd);
			info_fd = -1;
			return 0;
		}
#else
		p = mmap(0, sizeof(*dcc_clnt_info),
			 PROT_READ|PROT_WRITE, MAP_SHARED, info_fd, 0);
		if (p == MAP_FAILED) {
			dcc_pemsg(EX_IOERR, emsg, "mmap(%s): %s",
				  dcc_info_nm, ERROR_STR());
			close(info_fd);
			info_fd = -1;
			return 0;
		}
		dcc_clnt_info = p;
#endif
	}

	if (strncmp(dcc_clnt_info->version, DCC_MAP_INFO_VERSION,
		    sizeof(dcc_clnt_info->version))) {
		dcc_pemsg(EX_DATAERR, emsg, "unrecognized data in %s",
			  dcc_info_nm);
		dcc_unmap_info(0);
		return 0;
	}

	if (!get_clnt_hid(emsg)) {
		dcc_unmap_info(0);
		return 0;
	}

	if (result > 1)
		dcc_clnt_info->proto_hdr.op_nums.h = dcc_clnt_hid;

	return result;
}



/* SUID privileges are often released */
u_char					/* 0=something wrong, 1=mapped */
dcc_map_lock_info(DCC_EMSG emsg, const char *new_info_nm, int new_info_fd)
{
	return(0 < dcc_map_info(emsg, new_info_nm, new_info_fd)
	       && dcc_info_lock(emsg));
}



/* The shared memory must be locked */
static inline void
dcc_force_pick(DCC_SRVR_CLASS *class)
{
	class->act_inx = -1;
	class->fail_exp = 0;
}



/* Resolving host names must be locked */
static void
dcc_force_resolve(DCC_SRVR_CLASS *class)
{
	dcc_force_pick(class);
	class->resolve = 0;
}



/* Resolving host names must be locked */
void
dcc_force_measure_rtt(DCC_SRVR_CLASS *class, u_char new_avg_thold_rtt)
{
	dcc_force_resolve(class);
	if (new_avg_thold_rtt)
		class->avg_thold_rtt = -DCC_RTT_BAD;
	class->measure = 0;
}



/* pick the best server
 *      The client information and the contexts must be exclusively locked.
 *      Assume there is at least one hostname. */
static u_char				/* 0=have none, 1=same, 2=changed */
pick_srvr(DCC_EMSG emsg, DCC_SRVR_CLASS *class)
{
	const DCC_SRVR_ADDR *ap, *min_ap;
	int rtt;
	int min_rtt;			/* smallest RTT	*/
	int min2_rtt;			/* second smallest RTT */
	u_int16_t old_act_inx;

	old_act_inx = class->act_inx;
	min2_rtt = min_rtt = DCC_RTT_BAD;
	min_ap = 0;
	for (ap = class->addrs; ap <= LAST(class->addrs); ++ap) {
		rtt = ap->rtt;
		if (rtt == DCC_RTT_BAD
		    || ap->family == 0)
			continue;
		rtt += class->nms[ap->nm_inx].rtt_adj;
		if (min_rtt > rtt) {
			if (min2_rtt > min_rtt)
				min2_rtt = min_rtt;
			min_rtt = rtt;
			min_ap = ap;
		} else if (min2_rtt > rtt) {
			min2_rtt = rtt;
		}
	}

	/* we found a usable server */
	if (min_ap) {
		/* Compute the basic RTT to the server including a variance
		 * of the smaller of 50 milliseconds and half the real RTT. */
		rtt = min_ap->rtt/2;
		if (rtt > 50*1000)
			rtt = 50*1000;
		class->base_rtt = min_rtt + rtt;
		if (min2_rtt == DCC_RTT_BAD) {
			/* if there is no second choice, there is no
			 * point in a threshold for switching to it */
			class->thold_rtt = DCC_RTT_BAD;
		} else {
			class->thold_rtt = min(min2_rtt, class->base_rtt);
		}
		class->act_inx = (min_ap - class->addrs);
		if (class->act_inx != old_act_inx) {
			if (dcc_clnt_debug
			    && old_act_inx < class->num_addrs) {
				trace_perf("replacing",
					   &class->addrs[old_act_inx]);
				trace_perf("pick", min_ap);
			}
			return 2;
		}
		return 1;
	}

	/* we failed to find a server */
	if (emsg && *emsg == '\0') {
		dcc_pemsg(EX_IOERR, emsg,
			  "%s server%s %s%s%s%s%s%s at %s%s%s%s%s%s"
			  " not responding",
			  DCC_IS_GREY_STR(class),
			  class->nms[1].hostname[0] ? "s" : "",
			  class->nms[0].hostname,
			  class->nms[1].hostname[0] ? " " : "",
			  class->nms[1].hostname,
			  class->nms[2].hostname[0] ? " " : "",
			  class->nms[2].hostname,
			  class->nms[3].hostname[0] ? " ..." : "",

			  dcc_ap2str_opt(&class->addrs[0],
					 DCC_CLASS2PORT(class), '\0'),
			  class->num_addrs > 1 ? " " : "",
			  class->num_addrs > 1
			  ? dcc_ap2str_opt(&class->addrs[1],
					   DCC_CLASS2PORT(class), '\0')
			  : "",
			  class->num_addrs > 2 ? " " : "",
			  class->num_addrs > 2
			  ? dcc_ap2str_opt(&class->addrs[2],
					   DCC_CLASS2PORT(class), '\0')
			  : "",
			  class->num_addrs > 3 ? " ..." : "");
	}
	class->act_inx = -1;
	return 0;
}



/* count IP addresses per host name and per second level domain name */
typedef struct name_addrs {
    const char *sld;			/* domain name */
    u_char     sld_addrs;		/* # of addresses for domain name */
    u_char     host_addrs;		/* # of addresses for a host name */
    u_char     sld_addrs_inx;
} NAME_ADDRS[DCC_MAX_SRVR_NMS];


/* delete an address from a growing list of addresses */
static void
del_new_addr(DCC_SRVR_ADDR new_addrs[DCC_MAX_SRVR_ADDRS],
	     int *num_new_addrs,
	     NAME_ADDRS name_addrs,	/* addresses per server name */
	     int tgt)			/* delete this address */
{
	int nm_inx;
	int i;

	/* adjust that host's and domain's numbers of addresses and our
	 * total number of addresses */
	nm_inx = new_addrs[tgt].nm_inx;
	--name_addrs[nm_inx].host_addrs;
	--name_addrs[name_addrs[nm_inx].sld_addrs_inx].sld_addrs;
	--*num_new_addrs;

	/* slide the array of addresses to get rid of the discarded address */
	i = *num_new_addrs - tgt;
	if (i > 0)
		memmove(&new_addrs[tgt], &new_addrs[tgt+1],
			i * sizeof(new_addrs[0]));
	memset(&new_addrs[*num_new_addrs], 0, sizeof(new_addrs[0]));
}



static inline u_int
su_srvrs_mod(const DCC_SOCKU *su)
{
	struct in_addr addr4;
	DCC_SOCKU su_v4;
	u_int su_res;

	if (su->sa.sa_family == AF_INET)
		return su->ipv4.sin_addr.s_addr % DCC_SRVRS_MOD;

	if (dcc_ipv6toipv4(&addr4, &su->ipv6.sin6_addr)) {
		dcc_mk_su(&su_v4, AF_INET, &addr4, su->ipv6.sin6_port);
		su = &su_v4;
		su_res = su->ipv4.sin_addr.s_addr % DCC_SRVRS_MOD;
		su_res += DCC_SRVRS_MOD_IPV4;
		return su_res;
	}

	su_res = (su->ipv6.sin6_addr.s6_addr32[0]
		  + su->ipv6.sin6_addr.s6_addr32[1]
		  + su->ipv6.sin6_addr.s6_addr32[2]
		  + su->ipv6.sin6_addr.s6_addr32[3]) % DCC_SRVRS_MOD;
	return su_res;
}



static int
sucmp(const DCC_SOCKU *su1, const DCC_SOCKU *su2)
{
	u_int su1_res, su2_res;
	int i;

	su1_res = su_srvrs_mod(su1);
	su2_res = su_srvrs_mod(su2);

	i = (int)su1_res - (int)su2_res;
	if (i)
		return i;
	return memcmp(su1, su2, sizeof(DCC_SOCKU));
}



/* deal with a list of IP addresses or aliases for one DCC server hostname
 * the contexts and the mmap()'ed info must be locked */
static void
get_addrs(DCC_SRVR_ADDR new_addrs[DCC_MAX_SRVR_ADDRS],
	  int *num_new_addrs,
	  const DCC_SRVR_CLASS *cur,
	  DCC_SRVR_NM *nmp, int nm_inx,	/* server name being resolved */
	  NAME_ADDRS name_addrs,	/* addresses per server name */
	  u_int16_t port)
{
	DCC_SRVR_ADDR *ap;
	const DCC_SRVR_NM *nmp2;
	DCC_SOCKU *new, su, prev;
	u_char found;
	int nm1_inx, sld1_inx, sld2_inx;
	int host_max, sld_max;
	int i, j, k;

	/* Keep as many IP addresses as we have room, but for as many
	 * named servers as possible
	 * Sort the addresses to keep our list stable when we re-check.
	 * Otherwise, we would start from scratch when nothing changes
	 * but the order of responses from a DNS server.
	 * Sort by residue class to pick a random subset when there
	 * are too many servers to fit in our list. */

	memset(&prev, 0, sizeof(prev));
	prev.sa.sa_family = AF_INET;
	for (;;) {
		/* Pick the next address in the newly resolved list
		 * to consider.  We want the smallest address larger
		 * than the previous address we considered.
		 * "Smallest" is defined using the local, presumably
		 * random ordering of addresses. */
		found = 0;
		for (new = dcc_hostaddrs; new < dcc_hostaddrs_end; ++new) {
			*DCC_SU_PORT(new) = port;
			if (sucmp(new, &prev) > 0) {
				if (found && sucmp(&su, new) <= 0)
					continue;
				su = *new;
				found = 1;
			}
		}
		/* quit if we've considered them all */
		if (!found)
			break;
		/* found one to consider, so note it */
		memcpy(&prev, &su, sizeof(prev));

		/* ignore duplicate A records even for other hostnames,
		 * unless the port numbers or address families differ */
		ap = &new_addrs[*num_new_addrs];
		while (--ap >= new_addrs) {
			if (dcc_cmp_ap2su(ap, &su)) {
				/* keep the entry with the non-anonymous ID
				 * or smallest RTT adjustment */
				nmp2 = &cur->nms[ap->nm_inx];
				i = (nmp->clnt_id == DCC_ID_ANON);
				j = (nmp2->clnt_id == DCC_ID_ANON);
				if (i != j) {
					if (i)
					    goto next_addr;
				} else {
					if (nmp->rtt_adj >= nmp2->rtt_adj)
					    goto next_addr;
				}
				/* delete the previous instance */
				del_new_addr(new_addrs, num_new_addrs,
					     name_addrs, ap - new_addrs);
				break;
			}
		}

		/* If we already have as many addresses as we will use,
		 * then pick one to discard. Discard the last address of
		 * the host in the second level domain with the most
		 * addresses but without eliminating all addresses for any
		 * host name.  Look for the domain with the most IP addresses
		 * and that has at least one host with at least two
		 * addersses. */
		if (*num_new_addrs == DCC_MAX_SRVR_ADDRS) {
			host_max = -1;
			sld_max = -1;
			nm1_inx = -1;
			sld1_inx = -1;
			for (i = 0; i <= nm_inx; i++) {
				/* ignore hosts with only 1 IP address */
				j = name_addrs[i].host_addrs;
				if (j <= 1)
					continue;
				sld2_inx = name_addrs[i].sld_addrs_inx;
				k = name_addrs[sld2_inx].sld_addrs;
				if (sld_max <= k) {
					if (sld1_inx != sld2_inx) {
					    sld_max = k;
					    sld1_inx = sld2_inx;
					    host_max = j;
					    nm1_inx = i;
					} else if (host_max <= j) {
					    host_max = j;
					    nm1_inx = i;
					}
				}
			}
			/* no additional IP addresses for the target host if
			 * it has the most IP addresses */
			if (nm1_inx == nm_inx)
				return;

			/* find the last address of the host with the most */
			for (i = 0, j = 0; i < *num_new_addrs; i++) {
				if (new_addrs[i].nm_inx == nm1_inx)
					j = i;
			}
			/* and delete it */
			del_new_addr(new_addrs, num_new_addrs, name_addrs, j);
		}

		/* install the new address in the growing list */
		ap = &new_addrs[*num_new_addrs];
		memset(ap, 0, sizeof(*ap));
		ap->rtt = DCC_RTT_BAD;
		ap->port = su.ipv4.sin_port;
		if ((ap->family = su.sa.sa_family) == AF_INET) {
			ap->ip.v4.s_addr = su.ipv4.sin_addr.s_addr;
		} else {
			memcpy(&ap->ip.v6, &su.ipv6.sin6_addr,
			       sizeof(ap->ip.v6));
		}
		/* If this is a previously known address,
		 * preserve what we already knew about it */
		for (i = 0; i < DIM(cur->addrs); ++i) {
			if (dcc_cmp_ap2su(&cur->addrs[i], &su)) {
				memcpy(ap, &cur->addrs[i], sizeof(*ap));
				break;
			}
		}
		ap->nm_inx = nm_inx;
		++*num_new_addrs;

		++name_addrs[nm_inx].host_addrs;
		++name_addrs[name_addrs[nm_inx].sld_addrs_inx].sld_addrs;
next_addr:;
	}
}



/* resolve a server name into addresses
 * the contexts and the mmap()'ed info must be locked */
static void
dcc_resolve_host(DCC_EMSG emsg,
		 DCC_SRVR_ADDR new_addrs[DCC_MAX_SRVR_ADDRS],
		 int *num_new_addrs,
		 const DCC_SRVR_CLASS *cur,
		 DCC_SRVR_NM *nmp, int nm_inx,	/* server name being resolved */
		 NAME_ADDRS name_addrs) /* addresses per server name */
{
	const char *domain, *p1, *p2;
	int error;
	u_char result;
	int i;

	nmp->defined = 0;
	if (nmp->hostname[0] == '\0')
		return;

	/* find or create the total of addresses for this domain name */
	domain = nmp->hostname;
	p1 = strchr(domain, '.');
	if (p1) {
		for (;;) {
			p2 = strchr(++p1, '.');
			if (!p2)
				break;
			domain = p1;
			p1 = p2;
		}
	}
	name_addrs[nm_inx].sld = domain;
	for (i = 0; i < nm_inx; ++i) {
		if (name_addrs[i].sld != 0
		    && !strcmp(domain, name_addrs[i].sld))
			break;
	}
	name_addrs[nm_inx].sld_addrs_inx = i;

	dcc_host_lock();
	if (dcc_clnt_info->flags & DCC_CLNT_INFO_FG_SOCKS)
		result = dcc_get_host_SOCKS(nmp->hostname,
					    (dcc_clnt_info->flags
					     & DCC_CLNT_INFO_FG_IPV6)
					    ? 1 : 0,
					    &error);
	else
		result = dcc_get_host(nmp->hostname,
				      (dcc_clnt_info->flags
				       & DCC_CLNT_INFO_FG_IPV6)
				      ? 1 : 0,
				      &error);
	if (!result) {
		dcc_pemsg(EX_NOHOST, emsg, "dcc_get_host(%s): %s",
			  nmp->hostname, DCC_HSTRERROR(error));
		dcc_host_unlock();
		return;
	}
	nmp->defined = 1;
	get_addrs(new_addrs, num_new_addrs, cur, nmp, nm_inx,
		  name_addrs, nmp->port);
	dcc_host_unlock();
}



/* resolve server hostnames again
 *      The contexts, shared information, and resolving must be locked.
 *      Leaves the shared information locked only upon success.
 *      The contexts are locked on exit even on failure. */
static u_char				/* 0=no good addresses, 1=at least 1 */
dcc_clnt_resolve(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class)
{
	DCC_SRVR_ADDR new_addrs[DCC_MAX_SRVR_ADDRS];
	int num_new_addrs;
	int nm_inx;
	NAME_ADDRS name_addrs;
	DCC_SRVR_ADDR *new_ap, *cur_ap;
	int i;

	if (dcc_clnt_debug > 1)
		dcc_trace_msg("resolve %s server host names",
			      DCC_IS_GREY_STR(class));

	memset(new_addrs, 0, sizeof(new_addrs));
	num_new_addrs = 0;
	memset(&name_addrs, 0, sizeof(name_addrs));

	if (dcc_clnt_info->residue == 0) {
		dcc_clnt_info->residue = dcc_clnt_hid % DCC_SRVRS_MOD;
		if (dcc_clnt_info->residue == 0)
			dcc_clnt_info->residue = 1;
	}

	if (emsg)
		*emsg = '\0';
	for (nm_inx = 0; nm_inx < DIM(class->nms); ++nm_inx)
		dcc_resolve_host(emsg, new_addrs, &num_new_addrs,
				 class, &class->nms[nm_inx], nm_inx,
				 name_addrs);

	class->resolve = ctxt->now.tv_sec+DCC_RE_RESOLVE;

	/* see if anything changed */
	i = num_new_addrs;
	if (i != 0 && i == class->num_addrs) {
		new_ap = new_addrs;
		cur_ap = class->addrs;
		for (;;) {
			if (!i)
				return 1;   /* nothing changed */
			if (new_ap->nm_inx != cur_ap->nm_inx
			    || memcmp(&new_ap->ip, cur_ap, sizeof(new_ap->ip))
			    || new_ap->port != cur_ap->port
			    || new_ap->family != cur_ap->family) {
				++class->gen;
				break;
			}
			++new_ap;
			++cur_ap;
			--i;
		}
	}

	/* Something changed.
	 * Get the new values and arrange to recompute RTTs */
	class->act_inx = -1;
	class->avg_thold_rtt = -DCC_RTT_BAD;
	class->measure = 0;
	memcpy(&class->addrs, &new_addrs, sizeof(class->addrs));
	class->num_addrs = num_new_addrs;

	if (!class->num_addrs) {
		/* failed to resolve any server host names */
		class->resolve = 0;
		dcc_info_unlock(0);
		if (emsg && *emsg == '\0')
			dcc_pemsg(EX_USAGE, emsg,
				  "no valid %s server hostnames",
				  DCC_IS_GREY_STR(class));
		return 0;
	}
	return 1;
}



#ifdef DCC_UDP_CONNECT
/* it would be swell if there were a standard for
 * disconnecting a socket that everyone followed */
u_char
dcc_clnt_disconnect(DCC_EMSG emsg UATTRIB, DCC_CLNT_CTXT *ctxt)
{
	ctxt->conn_su.sa.sa_family = AF_UNSPEC;
	if (ctxt->soc == INVALID_SOCKET)
		return 1;

#ifdef DCC_UDP_DISCON_LEN
	ctxt->conn_su.ipv4.sin_port = 0;
	connect(ctxt->soc, &ctxt->conn_su.sa, DCC_UDP_DISCON_LEN);
	return 1;
#else
	return dcc_clnt_soc_open(emsg, ctxt);
#endif
}
#endif /* DCC_UDP_DISCON_LEN */



u_char
dcc_clnt_soc_open(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt)
{
	DCC_SOCKU su;
	int soc_len;
	int retries;

	if (ctxt->soc != INVALID_SOCKET) {
		close(ctxt->soc);
		ctxt->soc = INVALID_SOCKET;
	}
	/* the socket is now disconnected */
	ctxt->conn_su.sa.sa_family = AF_UNSPEC;

	retries = -1;
	if ((dcc_clnt_info->flags & DCC_CLNT_INFO_FG_IPV6)
	    && !dcc_udp_bind(emsg, &ctxt->soc,
			     dcc_mk_su(&su, AF_INET6, 0, ctxt->port),
			     &retries)) {
		dcc_info_unlock(emsg);
		return 0;
	}
	if (ctxt->soc == INVALID_SOCKET) {
		dcc_clnt_info->flags &= ~DCC_CLNT_INFO_FG_IPV6;
		retries = -1;
		if (!dcc_udp_bind(emsg, &ctxt->soc,
				  dcc_mk_su(&su, AF_INET, 0, ctxt->port),
				  &retries)) {
			dcc_info_unlock(emsg);
			return 0;
		}
	}
#if !defined(USE_POLL) && !defined(DCC_WIN32)
	if (ctxt->soc >= FD_SETSIZE) {
		dcc_pemsg(EX_IOERR, emsg, "socket FD %d > FD_SETSIZE %d",
			  ctxt->soc, FD_SETSIZE);
		closesocket(ctxt->soc);
		ctxt->soc = INVALID_SOCKET;
		dcc_info_unlock(emsg);
		return 0;
	}
#endif

#if defined(IPPROTO_IP) && defined(IP_TTL)
	if (dcc_debug_ttl != 0
	    && 0 > setsockopt(ctxt->soc, IPPROTO_IP, IP_TTL,
			      (void *)&dcc_debug_ttl, sizeof(dcc_debug_ttl))) {
		dcc_pemsg(EX_IOERR, emsg, "setsockopt(TTL=%d):%s",
			  dcc_debug_ttl, ERROR_STR());
		closesocket(ctxt->soc);
		ctxt->soc = INVALID_SOCKET;
		dcc_info_unlock(emsg);
		return 0;
	}
#endif

	soc_len = sizeof(su);
	if (0 > getsockname(ctxt->soc, &su.sa, &soc_len)) {
		dcc_pemsg(EX_IOERR, emsg, "getsockname(): %s", ERROR_STR());
		closesocket(ctxt->soc);
		ctxt->soc = INVALID_SOCKET;
		dcc_info_unlock(emsg);
		return 0;
	}
	ctxt->port = *DCC_SU_PORT(&su);
	ctxt->use_ipv6 = dcc_clnt_info->flags & DCC_CLNT_INFO_FG_IPV6;
	return 1;
}



/* send a single DCC message
 * the contexts and the shared information must be locked */
static u_char
clnt_xmit(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt, DCC_XLOG *xlog,
	  DCC_SRVR_CLASS *class, DCC_SRVR_ADDR *ap,
	  DCC_HDR *msg, int msg_len, u_char connect_ok)
{
	DCC_XLOG_ENTRY *xloge;
	char abuf[80];
	int i;
#ifdef DCC_UDP_CONNECT
	DCC_XLOG_ENTRY *xloge1;
	u_char was_connected;
	int j;
#else
	connect_ok = 0
#endif

	msg->len = htons(msg_len);

	xloge = xlog->next;
	if (xloge > xlog->last)
		abort();
	++msg->op_nums.t;
	xloge->op_nums = msg->op_nums;
	xloge->addr_inx = ap - class->addrs;
	dcc_mk_su(&xloge->su, ap->family, &ap->ip, ap->port);
	xloge->addrs_gen = class->gen;
	xloge->sent_us = ctxt->now_us;
	xloge->op = msg->op;
	xloge->id = class->nms[ap->nm_inx].clnt_id;
	if (xloge->id == 0)
		dcc_logbad(EX_SOFTWARE, "clnt_xmit: zero client-ID");
	if (class->nms[ap->nm_inx].passwd[0] == '\0')
		xloge->id = DCC_ID_ANON;
	msg->sender = htonl(xloge->id);
	if (xloge->id != DCC_ID_ANON) {
		strncpy(xloge->passwd, class->nms[ap->nm_inx].passwd,
			sizeof(xloge->passwd));
		dcc_sign(xloge->passwd, sizeof(xloge->passwd), msg, msg_len);
	} else {
		xloge->passwd[0] = '\0';
		memset((char *)msg + (msg_len-sizeof(DCC_SIGNATURE)), 0,
		       sizeof(DCC_SIGNATURE));
	}

#ifdef DCC_UDP_CONNECT
	/* Use connect() when possible to get ICMP Unreachable messages.
	 * It is impossible with SOCKS
	 * or when talking to more than one server. */
	if (dcc_clnt_info->flags & DCC_CLNT_INFO_FG_SOCKS)
		connect_ok = 0;
	for (xloge1 = xlog->base; connect_ok && xloge1 < xloge; ++xloge1) {
		if (xloge1->op_nums.t == DCC_OP_NUMS_NULL)
			continue;
		if (xloge->addr_inx != xloge1->addr_inx) {
			connect_ok = 0;
			break;
		}
	}
	if (connect_ok) {
		if (memcmp(&ctxt->conn_su, &xloge->su, sizeof(ctxt->conn_su))) {
#ifdef linux
			/* at least some versions of Linux do not allow
			 * connsecutive valid calls to connect()
			 * but work if the socket is first disconnected */
			if (!dcc_clnt_disconnect(emsg, ctxt))
				return 0;
#endif /* linux */
			if (SOCKET_ERROR == connect(ctxt->soc, &xloge->su.sa,
						    DCC_SU_LEN(&xloge->su))) {
				dcc_pemsg(EX_IOERR, emsg, "connect(%s): %s",
					  addr2str(abuf, sizeof(abuf), class,
						   class->gen, ap, 0),
					  ERROR_STR());
				return 0;
			}
			ctxt->conn_su = xloge->su;
		}
		i = send(ctxt->soc,
#ifdef DCC_WIN32
			 (const char *)msg,
#else
			 msg,
#endif
			 msg_len, 0);

	} else {
		if (ctxt->conn_su.sa.sa_family == AF_UNSPEC) {
			was_connected = 0;
		} else {
			if (!dcc_clnt_disconnect(emsg, ctxt))
				return 0;
			was_connected = 1;
		}

		/* paranoid infinite loop prevention */
		j = DCC_MAX_XMITS*DCC_MAX_SRVR_ADDRS*2;
		do {
#endif /* DCC_UDP_CONNECT */
#ifdef HAVE_LIBSOCKS
			if (dcc_clnt_info->flags & DCC_CLNT_INFO_FG_SOCKS)
				i = Rsendto(ctxt->soc, msg, msg_len, 0,
					    &xloge->su.sa,
					    DCC_SU_LEN(&xloge->su));
			else
#endif /* HAVE_LIBSOCKS */
				i = sendto(ctxt->soc,
#ifdef DCC_WIN32
					   (const char *)msg,
#else
					   msg,
#endif
					   msg_len, 0, &xloge->su.sa,
					   DCC_SU_LEN(&xloge->su));
#ifdef DCC_UDP_CONNECT
			/* deal with ICMP messages for a previous
			 * connected transmission */
		} while (i == SOCKET_ERROR && was_connected
			 && DCC_CONNECT_ERRORS() && --j > 0);
	}
#endif /* DCC_UDP_CONNECT */
	++xlog->cur[ap - class->addrs].xmits;
	if (i == msg_len) {
		if (xloge < xlog->last) {
			++xlog->next;
			++xlog->outstanding;
		}
		return 1;
	}
	if (i < 0) {
		dcc_pemsg(EX_IOERR, emsg, "%s(%s): %s",
			  connect_ok ? "send" : "sendto",
			  addr2str(abuf, sizeof(abuf), class,
				   class->gen, ap, 0),
			  ERROR_STR());
	} else {
		dcc_pemsg(EX_IOERR, emsg, "%s(%s)=%d instead of %d",
			  connect_ok ? "send" : "sendto",
			  addr2str(abuf, sizeof(abuf), class,
				   class->gen, ap, 0),
			  i, msg_len);
	}
	return 0;
}



static void
update_rtt(DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class,
	   DCC_XLOG_ENTRY *xloge, int us)
{
	DCC_SRVR_ADDR *ap;

	if (us < 0)
		us = 0;
	else if (us > DCC_MAX_RTT)
		us = DCC_MAX_RTT;

	/* compute new RTT if the map data structure is locked,
	 * the clock did not jump,
	 * and we're talking about the same hosts */
	if (info_locked
	    && xloge->addrs_gen == class->gen) {
		ap = &class->addrs[xloge->addr_inx];
		if (ap->rtt == DCC_RTT_BAD) {
			ap->rtt = us;
			ap->total_xmits = 0;
			ap->total_resps = 0;
			ap->resp_mem = 0;
			ap->rtt_updated = 0;

		} else if (ctxt->now.tv_sec < (ap->rtt_updated
					       + FAST_RTT_SECS)) {
			/* adjust the RTT quickly if this is the first
			 * measurement in a long time */
			AGE_AVG(ap->rtt, us, 2, 1);
			ap->rtt_updated = ctxt->now.tv_sec;

		} else {
			AGE_AVG(ap->rtt, us, 9, 1);
			ap->rtt_updated = ctxt->now.tv_sec;
		}
		return;
	}
}



/* update response rate and penalize the RTT of servers that failed to respond
 * the data must be locked */
static void
resp_rates(DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class,
	   DCC_XLOG *xlog, u_char measuring)
{
	DCC_SRVR_ADDR *ap;
	DCC_XLOG_ENTRY *xloge;
	int us;
	int i;

	for (xloge = xlog->base; xloge < xlog->next; ++xloge) {
		/* ignore responses we've already handled */
		if (xloge->op_nums.t == DCC_OP_NUMS_NULL)
			continue;

		ap = &class->addrs[xloge->addr_inx];

		/* if no response has ever been seen from the server,
		 * we can't guess */
		if (ap->rtt == DCC_RTT_BAD)
			continue;

		/* Update the RTT of this server as if we had just received
		 * its response unless we didn't give it an RTT's worth.
		 * Guess that the missing responses would have arrived
		 * a little late */
		us = ctxt->now_us - xloge->sent_us;
		if (us >= ap->rtt)
			update_rtt(ctxt, class, xloge, us + DCC_INIT_RTT);
	}

	/* maintain the response rate */
	for (i = 0, ap = class->addrs; i < DIM(xlog->cur); ++i, ++ap) {
		if (ap->rtt == DCC_RTT_BAD
		    || xlog->cur[i].xmits == 0)
			continue;
		if (measuring) {
			if (xlog->cur[i].resps != 0) {
				++xlog->working_addrs;
			} else if (!(ap->resp_mem & ((1<<DCC_MAX_XMITS)-1))) {
				/* this server is bad if there were no answers
				 * at all for this mesurement cycle */
				ap->rtt = DCC_RTT_BAD;
				continue;
			}
		}
		ap->total_xmits += xlog->cur[i].xmits;
		if (ap->total_xmits > DCC_TOTAL_XMITS_MAX)
			ap->total_xmits = DCC_TOTAL_XMITS_MAX;
		do {
			ap->total_resps -= (ap->resp_mem
					    >> (DCC_TOTAL_XMITS_MAX-1));
			ap->resp_mem <<= 1;
			if (xlog->cur[i].resps != 0) {
				ap->resp_mem |= 1;
				++ap->total_resps;
				--xlog->cur[i].resps;
			}
		} while (--xlog->cur[i].xmits != 0);
	}
}



/* receive a single DCC response
 *      The contexts must be locked.
 *      The mapped or common info ought to be locked, but reception
 *      works if it is not. */
static int      /* -1=fatal error, 0=no data, 1=unreachable, 2=ok */
clnt_recv(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class,
	  DCC_HDR *resp, int resp_len,
	  const DCC_HDR *msg, DCC_XLOG *xlog, DCC_XLOG_ENTRY **xlogep)
{
	DCC_SOCKLEN_T su_len;
	DCC_SOCKU su;
	DCC_XLOG_ENTRY *xloge, *xloge1;
	char abuf[80];
	int pkt_len;

	*xlogep = 0;
	for (;;) {
		su_len = sizeof(su);
		memset(&su, 0, sizeof(su));
#ifdef HAVE_LIBSOCKS
		if (dcc_clnt_info->flags & DCC_CLNT_INFO_FG_SOCKS)
			pkt_len = Rrecvfrom(ctxt->soc, resp, resp_len, 0,
					    &su.sa, &su_len);
		else
#endif /* HAVE_LIBSOCKS */
			pkt_len = recvfrom(ctxt->soc,
#ifdef DCC_WIN32
					   (char *)resp,
#else
					   resp,
#endif
					   resp_len, 0, &su.sa, &su_len);
		if (pkt_len < 0) {
			/* Stop looking when there are no more packets */
			if (DCC_BLOCK_ERROR())
				return 0;

			/* ignore ICMP Unreachables unless we have a
			 * single outstanding request */
			if (ctxt->conn_su.sa.sa_family != AF_UNSPEC
			    && DCC_CONNECT_ERRORS()) {
				for (xloge1 = xlog->base, xloge = 0;
				     xloge1 < xlog->next;
				     ++xloge1) {
					if (xloge1->op_nums.t==DCC_OP_NUMS_NULL)
					    continue;
					if (xloge)
					    xloge->op_nums.t = DCC_OP_NUMS_NULL;
					xloge = xloge1;
				}
				if (!xloge) {
					if (dcc_clnt_debug)
					    dcc_trace_msg("ignore unmatched:"
							" %s", ERROR_STR());
					continue;
				}
				if (dcc_clnt_debug)
					dcc_trace_msg("note recvfrom(%s): %s",
						      dcc_su2str(&su),
						      ERROR_STR());
				xloge->op_nums.t = DCC_OP_NUMS_NULL;
				xlog->outstanding = 0;
				class->addrs[xloge->addr_inx].rtt = DCC_RTT_BAD;
				++xlog->cur[xloge->addr_inx].resps;
				*xlogep = xloge;
				return 1;
			}

			dcc_pemsg(EX_IOERR, emsg, "recvfrom(%s): %s",
				  su.sa.sa_family ? dcc_su2str(&su) : "",
				  ERROR_STR());
			return -1;
		}

		if (pkt_len < (int)(sizeof(DCC_HDR)+sizeof(DCC_SIGNATURE))) {
			trace_bad_packet(xlog, &su, "recv(%s)=%d < %d",
					 dcc_su2str(&su), pkt_len,
					 ISZ(DCC_HDR)+ISZ(DCC_SIGNATURE));
			continue;
		}
		if (pkt_len != ntohs(resp->len)) {
			trace_bad_packet(xlog, &su,
					 "recv(%s)=%d but hdr len=%d from %s",
					 dcc_su2str(&su), pkt_len,
					 ntohs(resp->len), dcc_su2str(&su));
			continue;
		}

		if (resp->pkt_vers < DCC_PKT_VERSION_MIN
		    || resp->pkt_vers > DCC_PKT_VERSION_MAX) {
			trace_bad_packet(xlog, &su,
					 "unrecognized version #%d from %s",
					 resp->pkt_vers, dcc_su2str(&su));
			continue;
		}

		/* We cannot use the server's apparent IP address because it
		 * might be multihomed and respond with an address other than
		 * to which we sent.  So use our records of which OP_NUMS was
		 * sent to which server address. */
		if (resp->op_nums.r != msg->op_nums.r
		    || resp->op_nums.p != msg->op_nums.p
		    || resp->op_nums.h != msg->op_nums.h) {
			if (dcc_clnt_debug)
				dcc_trace_msg("unmatched response from %s"
					      " h=%#x/%#x p=%#x/%#x r=%#x/%#x"
					      " t=%#x",
					      dcc_su2str(&su),
					      resp->op_nums.h, msg->op_nums.h,
					      resp->op_nums. p, msg->op_nums.p,
					      resp->op_nums.r, msg->op_nums.r,
					      resp->op_nums.t);
			continue;
		}

		for (xloge = xlog->base; xloge < xlog->next; ++xloge) {
			if (resp->op_nums.t == xloge->op_nums.t)
				break;
		}
		if (xloge >= xlog->next) {
			if (dcc_clnt_debug)
				dcc_trace_msg("stray response from %s"
					      " h=%#x/%#x p=%#x/%#x r=%#x/%#x"
					      " t=%#x",
					      dcc_su2str(&su),
					      resp->op_nums.h, msg->op_nums.h,
					      resp->op_nums.p, msg->op_nums.p,
					      resp->op_nums.r, msg->op_nums.r,
					      resp->op_nums.t);
			continue;
		}


#ifdef CLNT_LOSSES
		if ((++clnt_losses % 5) == 0) {
			dcc_trace_msg("dropped answer from %s",
				      addr_inx2str(abuf, sizeof(abuf), class,
						   xloge->addrs_gen,
						   xloge->addr_inx, &su));
			continue;
		}
#endif
		if (xloge->passwd[0] != '\0'
		    && dcc_ck_signature(xloge->passwd, sizeof(xloge->passwd),
					resp, pkt_len))
			break;		/* good signature */

		if (dcc_ck_signature((char *)&xloge->op_nums,
				     sizeof(xloge->op_nums),
				     resp, pkt_len)) {
			/* server did not sign with our password,
			 * but with our transaction numbers */
			if (xloge->passwd[0] != '\0')
				dcc_error_msg("%s rejected our password"
					      " for ID %d",
					      addr_inx2str(abuf, sizeof(abuf),
							class,
							xloge->addrs_gen,
							xloge->addr_inx,
							&su),
					      xloge->id);
			break;
		}

		dcc_error_msg("badly signed %s response from %s",
			      dcc_hdr_op2str(msg),
			      addr_inx2str(abuf, sizeof(abuf), class,
					   xloge->addrs_gen, xloge->addr_inx,
					   &su));
	}

	/* Found a record of this transmission */
	xloge->op_nums.t = DCC_OP_NUMS_NULL;    /* don't find it again */
	if (xlog->outstanding != 0)
		--xlog->outstanding;
	++xlog->cur[xloge->addr_inx].resps;

	*xlogep = xloge;
	return 2;
}



/* wait for an answer */
int					/* -1=error, 0=timeout, 1=ready */
dcc_select_poll(DCC_EMSG emsg,
		SOCKET fd,
		u_char rd,		/* 1=read 0=write */
		int usec)		/* <0=forever until signal */
{
#ifdef USE_POLL
	struct pollfd fds;
	int nfds;
	int delay;

	if (usec < 0)
		delay = -1;
	else
		delay = (usec+999)/1000;

	for (;;) {
		fds.fd = fd;
		/* At least some versions of Linux have POLLRDNORM etc. in
		 * asm/poll.h, but with definitions of POLLIN, POLLPRI, etc.
		 * that conflict with their definitions in sys/poll.h.
		 * Perhaps it is not necessary to check for high or
		 * low priority data, but the poll() documentation on
		 * some systems says that asking about POLLIN does not
		 * say anything about other data */
#ifdef POLLRDNORM
		if (rd)
			fds.events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
		else
			fds.events = POLLOUT| POLLWRNORM | POLLWRBAND | POLLPRI;
#else
		if (rd)
			fds.events = POLLIN;
		else
			fds.events = POLLOUT;
#endif
		fds.revents = 0;
		nfds = poll(&fds, 1, delay);
		if (nfds >= 0)
			return nfds;
		if (!DCC_SELECT_NERROR()) {
			dcc_pemsg(EX_OSERR, emsg, "poll(): %s", ERROR_STR());
			return -1;
		}
		if (usec < 0)		/* stop forever on a signal */
			return 0;
	}
#else
	struct timeval delay, *delayp;
	fd_set fds;
	int nfds;

	if (usec < 0) {
		delayp = 0;
	} else {
		delay.tv_usec = usec % DCC_USECS;
		delay.tv_sec = usec / DCC_USECS;
		delayp = &delay;
	}

	FD_ZERO(&fds);
	for (;;) {
		FD_SET(fd, &fds);
		if (rd)
			nfds = select(fd+1, &fds, 0, 0, delayp);
		else
			nfds = select(fd+1, 0, &fds, 0, delayp);
		if (nfds >= 0)
			return nfds;
		if (!DCC_SELECT_NERROR()) {
			dcc_pemsg(EX_OSERR, emsg, "select(): %s", ERROR_STR());
			return -1;
		}
		if (usec < 0)		/* stop forever on a signal */
			return 0;
	}
#endif
}



/* Make initial estimates of the RTT to all known servers
 *      The RTT's help the client pick a server that will respond quickly and
 *      reliably and to know when to retransmit a request that is lost due
 *      to network congestion or bit rot.
 *  The contexts, shared information, and resolving must all be locked on entry.
 *  The shared information is unlocked on failure but the contexts are locked */
static u_char				/* 0=failed, 1=at least 1 good server */
measure_rtt(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class)
{
	DCC_SRVR_ADDR *ap;
	DCC_NOP nop;
	union {
	    DCC_HDR     hdr;
	    DCC_OK      ok;
	    DCC_ERROR   error;
	} resp;
	int rtt, us, delay_us, next_xmit, end_us;
	int nfds, xmit_num;
	int tgt_addrs;
	DCC_XLOG xlog;
	DCC_XLOG_ENTRY xlog_entries[DCC_MAX_SRVR_ADDRS*DCC_MAX_XMITS];
	DCC_XLOG_ENTRY *xloge;
	char abuf[80];
	u_char vers;
	int i;

	/* Send NOP's to all addresses and wait for responses to
	 * measure each server's health and RTT.
	 * Treat all addresses as if they are of independent hosts */

	memcpy(&nop.hdr, &dcc_clnt_info->proto_hdr, sizeof(nop.hdr));
	/* servers ignore the version on NOPs */
	nop.hdr.pkt_vers = DCC_PKT_VERSION_MIN;
	nop.hdr.op_nums.p = getpid();
	nop.hdr.op = DCC_OP_NOP;
	nop.hdr.op_nums.r = ++dcc_clnt_info->proto_hdr.op_nums.r;

	if (!get_now(emsg, ctxt)) {
		dcc_info_unlock(0);
		return 0;
	}
	if (emsg)
		*emsg = '\0';

	/* Figure out the earliest when we could be finished
	 * by finding the largest current RTT */
	rtt = DCC_INIT_RTT;
	for (ap = class->addrs; ap <= LAST(class->addrs); ++ap) {
		if (ap->family == 0)
			continue;
		if (!(ap->resp_mem & 1))
			continue;
		us = ap->rtt - ap->srvr_wait;
		if (rtt < us)
			rtt = us;
	}
	end_us = MIN_RETRANS_TIME(rtt, 1);
	end_us += ctxt->now_us;

	tgt_addrs = class->num_addrs;
	if (tgt_addrs <= 0) {
		dcc_pemsg(EX_NOHOST, emsg, "no server addresses");
		dcc_info_unlock(0);
		return 0;
	}
	/* stop waiting for responses when we have enough working servers */
	if (!dcc_all_srvrs && tgt_addrs > 4)
		tgt_addrs = 4;

	memset(&xlog, 0, sizeof(xlog));
	xlog.base = xlog.next = xlog_entries;
	xlog.last = LAST(xlog_entries);
	delay_us = 0;
	next_xmit = 0;
	xmit_num = 0;
	/* wait for the responses to the NOPs and retransmit as needed */
	for (;;) {
		/* wait quietly until time to retransmit */
		if (delay_us <= 0) {
			if (xmit_num >= DCC_MAX_XMITS)
				break;
			if (xlog.working_addrs < tgt_addrs) {
				int tgts = 0;
				for (i = 0, ap = class->addrs;
				     ap <= LAST(class->addrs);
				     ++i, ++ap) {
					if (ap->family == 0
					    || xlog.cur[i].resps != 0)
					    continue;
					if (++tgts > 1)
					    break;
				}
				if (tgts > 0) {
					tgts = (tgts == 1);
					for (i = 0, ap = class->addrs;
					     ap <= LAST(class->addrs);
					     ++i, ++ap) {
					    if (ap->family == 0
						|| xlog.cur[i].resps != 0)
						continue;
					    clnt_xmit(emsg, ctxt, &xlog,
						      class, ap,
						      &nop.hdr, sizeof(nop),
						      tgts);
					}
				}
			}

			/* stop if nothing to wait for */
			if (!xlog.outstanding)
				break;
			delay_us = dcc_retrans_time(DCC_INIT_RTT, ++xmit_num);
			next_xmit = delay_us + ctxt->now_us;
			if (next_xmit > end_us
			    && xlog.working_addrs >= tgt_addrs) {
				next_xmit = end_us;
				delay_us = next_xmit - ctxt->now_us;
				if (delay_us <= 0)
					break;
			}
		}

		if (!dcc_info_unlock(emsg))
			return 0;
		dcc_ctxts_unlock();
		nfds = dcc_select_poll(emsg, ctxt->soc, 1, delay_us);
		if (nfds < 0) {
			dcc_ctxts_lock();
			return 0;
		}
		i = get_now(emsg, ctxt);
		dcc_ctxts_lock();
		/* give up someone changed the local clock */
		if (!i)
			return 0;
		if (!dcc_info_lock(emsg))
			return 0;

		if (nfds > 0) for (;;) {
			i = clnt_recv(emsg, ctxt, class,
				      &resp.hdr, sizeof(resp),
				      &nop.hdr, &xlog, &xloge);
			if (i <= 0)
				break;

			if (i == 1)     /* otherwise ignore Unreachable */
				continue;

			/* record the results of a probe, and notice
			 * if the server is the best so far */
			ap = &class->addrs[xloge->addr_inx];

			if (resp.hdr.op != DCC_OP_OK) {
				if (dcc_clnt_debug)
					dcc_trace_msg("RTT NOP answered"
						      " with %s by %s",
						      dcc_hdr_op2str(&resp.hdr),
						      addr2str(abuf,
							  sizeof(abuf),
							  class,
							  xloge->addrs_gen,
							  ap, 0));
				ap->rtt = DCC_RTT_BAD;
				continue;
			}

			vers = resp.ok.max_pkt_vers;
			if (vers >= DCC_PKT_VERSION_MAX)
				vers = DCC_PKT_VERSION_MAX;
			else if (vers < DCC_PKT_VERSION_MIN)
				vers = DCC_PKT_VERSION_MIN;
			ap->srvr_pkt_vers = vers;
			ap->srvr_id = ntohl(resp.hdr.sender);
			memcpy(ap->brand, resp.ok.brand, sizeof(ap->brand));
			ap->srvr_wait = ntohs(resp.ok.qdelay_ms)*1000;

			update_rtt(ctxt, class, xloge,
				   ctxt->now_us - xloge->sent_us
				   + ap->srvr_wait);
		}

		if (xlog.outstanding == 0)
			delay_us = 0;
		else
			delay_us = next_xmit - ctxt->now_us;
		if (xlog.working_addrs >= tgt_addrs) {
			us = end_us - ctxt->now_us;
			if (us <= 0)
				break;
			if (delay_us > us)
				delay_us = us;
		}
	}

	resp_rates(ctxt, class, &xlog, 1);

	if (!pick_srvr(emsg, class)) {
		fail_more(class);
		dcc_info_unlock(0);
		return 0;
	}

	/* maintain long term average that is used to switch back to
	 * good servers that temporarily go bad */
	if (class->thold_rtt == DCC_RTT_BAD) {
		class->avg_thold_rtt = DCC_RTT_BAD;
	} else if (class->avg_thold_rtt == -DCC_RTT_BAD) {
		class->avg_thold_rtt = class->base_rtt;
	} else {
		AGE_AVG(class->avg_thold_rtt, class->base_rtt, 9, 1);
	}

	class->measure = ctxt->now.tv_sec+FAST_RTT_SECS;

	return 1;
}



/* Get and write-lock common info
 *      The contexts must be locked.
 *      The contexts remain locked on failure.  The shared information
 *	    is locked only on success. */
u_char					/* 0=failed 1=ok */
dcc_clnt_rdy(DCC_EMSG emsg,
	     DCC_CLNT_CTXT *ctxt,
	     u_char flags)		/* DCC_CLNT_FG_* */
{
	DCC_SRVR_CLASS *class;
	DCC_SRVR_ADDR *cur_addr;
	int inx, rtt;
	u_char srvr_ok, locked;

	if (!dcc_info_lock(emsg))
		return 0;

	get_start_time(ctxt);

	/* just fail if things were broken and it's too soon to try again */
	class = DCC_GREY2CLASS(flags & DCC_CLNT_FG_GREY);
	if (!(flags & DCC_CLNT_FG_NO_FAIL)
	    && !ck_fail_time(emsg, ctxt, class)) {
		dcc_info_unlock(emsg);
		return 0;
	}

	/* Try to bind a socket with IPv6 first if allowed.
	 * If that doesn't work, try IPv4 */
	if (!ctxt->use_ipv6 != !(dcc_clnt_info->flags & DCC_CLNT_INFO_FG_IPV6)
	    && ctxt->soc != INVALID_SOCKET) {
		if (SOCKET_ERROR == closesocket(ctxt->soc)
		    && dcc_clnt_debug)
			dcc_trace_msg("closesocket(ctxt): %s", ERROR_STR());
		ctxt->soc = INVALID_SOCKET;
	}

	if (ctxt->soc == INVALID_SOCKET
	    && !dcc_clnt_soc_open(emsg, ctxt))
		return 0;

	if (flags & DCC_CLNT_FG_NO_SRVR_OK) {
		/* assume server is ok if the caller doesn't care */
		srvr_ok = 1;
	} else {
		inx = class->act_inx;
		if (inx >= class->num_addrs) {
			srvr_ok = pick_srvr(emsg, class);
		} else {
			/* try to pick a new server if the current server
			 * has become slow or unreliable */
			cur_addr = &class->addrs[inx];
			rtt = (class->nms[cur_addr->nm_inx].rtt_adj
			       + cur_addr->rtt);
			if (rtt <= class->thold_rtt)
				srvr_ok = 1;
			else
				srvr_ok = pick_srvr(emsg, class);
		}
	}

	/* Check for new IP addresses occassionally
	 * If we cannot awaken a separate thread, do it ourself */
	if (srvr_ok
	    && DCC_IS_TIME(ctxt->now.tv_sec, class->resolve,
			   DCC_RE_RESOLVE)
	    && !dcc_clnt_wake_resolve())
		srvr_ok = 0;

	locked = 0;
	if (!srvr_ok) {
		if (!dcc_resolve_lock(emsg))
			return 0;
		locked = 1;
		/* if after waiting for the lock,
		 * do the work if it is still needed */

		/* just fail if things became badly broken while we waited
		 * and it's too soon to try again  */
		if (!get_now(emsg, ctxt)
		    || (!(flags & DCC_CLNT_FG_NO_FAIL)
			&& !ck_fail_time(emsg, ctxt, class))) {
			dcc_resolve_unlock(0);
			dcc_info_unlock(0);
			return 0;
		}

		inx = class->act_inx;
		if (inx < class->num_addrs
		    && !DCC_IS_TIME(ctxt->now.tv_sec, class->resolve,
				    DCC_RE_RESOLVE)) {
			/* everything was fixed while we waited for the lock */
			srvr_ok = 1;
		} else {
			/* check for new A RRs */
			if (!dcc_clnt_resolve(emsg, ctxt, class)) {
				dcc_resolve_unlock(0);
				dcc_info_unlock(emsg);
				return 0;
			}
		}
	}
	/* srvr_ok == 0 if we should take a measurement */

	/* We might have switched to the current server when our
	 * best server became slow.
	 * If it has been a while, see if our best server is back. */
	if (srvr_ok
	    && DCC_IS_TIME(ctxt->now.tv_sec, class->measure, FAST_RTT_SECS)
	    && !(flags & DCC_CLNT_FG_NO_SRVR_OK)) {
		inx = class->act_inx;
		if (inx >= class->num_addrs) {
			srvr_ok = 0;
		} else {
			cur_addr = &class->addrs[inx];
			rtt = (class->nms[cur_addr->nm_inx].rtt_adj
			       + cur_addr->rtt);
			if (rtt > class->avg_thold_rtt)
				srvr_ok = 0;
		}
	}

	/* measure the RTTs to all of the servers and pick one */
	if (!srvr_ok) {
		if (!locked) {
			if (!dcc_resolve_lock(emsg))
				return 0;
			locked = 1;
		}
		if (!measure_rtt(emsg, ctxt, class)
		    && !(flags & DCC_CLNT_FG_NO_SRVR_OK)) {
			dcc_resolve_unlock(0);
			return 0;
		}
	}

	if (locked && !dcc_resolve_unlock(emsg)) {
		dcc_info_unlock(0);
		return 0;
	}
	return 1;
}



/* send an operation to the server and get a response
 *      The operation and response buffers must be distinct, because the
 *	    response buffer is changed before the last use of the operation
 *	    buffer */
u_char					/* 0=failed 1=ok */
dcc_clnt_op(DCC_EMSG emsg,
	    DCC_CLNT_CTXT *ctxt,
	    u_char flags,
	    const int *act_inxp,	/* null or ptr to server index or -1 */
	    DCC_SRVR_ID *srvr_idp,	/* ID of server used */
	    DCC_HDR *msg, int msg_len, DCC_OPS op,
	    DCC_HDR *resp, int resp_max_len, DCC_SOCKU *resp_su)
{
	DCC_SRVR_CLASS *class;
	DCC_SRVR_ADDR *cur_addr;
	int addrs_gen;
	union {
	    DCC_HDR     hdr;
	    DCC_QUERY_RESP query_resp;
	    DCC_ADMN_RESP admin_resp;
	    char	c[80];
	} buf;
	DCC_XLOG_ENTRY *xloge;
	int act_inx, xmit_num;
	int next_xmit, us, nfds;
	u_char unreachable, gotit;
	int i;

	if (emsg)
		*emsg = '\0';
	dcc_ctxts_lock();
	if (!dcc_clnt_info
	    && !dcc_map_info(emsg, 0, -1)) {
		dcc_ctxts_unlock();
		if (srvr_idp)
			*srvr_idp = DCC_ID_INVALID;
		return 0;
	}
	/* get & lock common info */
	if (!dcc_clnt_rdy(emsg, ctxt, flags)) {
		dcc_ctxts_unlock();
		if (srvr_idp)
			*srvr_idp = DCC_ID_INVALID;
		return 0;
	}
	class = DCC_GREY2CLASS(flags & DCC_CLNT_FG_GREY);

	if (resp_max_len > ISZ(buf))
		resp_max_len = ISZ(buf);

	/* use server that the caller wants,
	 * if the caller specified the valid index of a server */
	if (!act_inxp
	    || (act_inx = *act_inxp) < 0
	    || act_inx >= class->num_addrs) {
		act_inx = class->act_inx;
	}
	cur_addr = &class->addrs[act_inx];
	if (srvr_idp)
		*srvr_idp = cur_addr->srvr_id;
	if (resp_su)
		dcc_mk_su(resp_su,
			  cur_addr->family, &cur_addr->ip, cur_addr->port);
	addrs_gen = class->gen;

	++dcc_clnt_info->proto_hdr.op_nums.r;
	memcpy(msg, &dcc_clnt_info->proto_hdr, sizeof(*msg));
	if (cur_addr->srvr_pkt_vers > DCC_PKT_VERSION_MAX
	    || cur_addr->srvr_pkt_vers < DCC_PKT_VERSION_MIN) {
		dcc_pemsg(EX_DATAERR, emsg, "impossible pkt_vers %d for %s",
			  cur_addr->srvr_pkt_vers,
			  addr2str(buf.c, sizeof(buf.c), class,
				   addrs_gen, cur_addr, 0));
		dcc_info_unlock(0);
		dcc_ctxts_unlock();
		if (srvr_idp)
			*srvr_idp = DCC_ID_INVALID;
		return 0;
	}
	msg->pkt_vers = cur_addr->srvr_pkt_vers;
	msg->op_nums.p = getpid();
	msg->op = op;
	gotit = 0;
	unreachable = 0;

	/* The measured RTT's to servers is to help the client pick a server
	 * that will respond quickly and reliably and to know when to
	 * retransmit a request that is lost due to network congestion or
	 * bit rot.
	 *
	 * It is desirable for a client to try to concentrate its reports to
	 * a single server.  That makes detecting spam by this and other
	 * clients quicker.
	 *
	 * A client should retransmit when its initial transmission is lost
	 * due to bit rot or congestion.  In case the loss is due to
	 * congestion, it should retransmit only a limited number of
	 * times and with increasing delays between retransmissions.
	 *
	 * It is more important that some requests from clients reach
	 * a DCC server than others.  Most DCC checksum reports are not about
	 * spam, and so it is best to not spend too much network bandwidth
	 * retransmitting checksum reports or to delay the processing of the
	 * messages. Administrative commands must be tried harder.
	 * Therefore, let the caller of this routine decide whether to try
	 * again, while this routine merely increases the measured RTT after
	 * failures. */

	memset(&ctxt->xlog, 0, sizeof(ctxt->xlog));
	ctxt->xlog.base = ctxt->xlog.next = ctxt->xlog_entries;
	ctxt->xlog.last = LAST(ctxt->xlog_entries);
	xmit_num = 0;
	next_xmit = ctxt->now_us;

	/* Transmit, wait for a response, and retransmit as needed.
	 * The initial transmission is done as if it were a retransmission. */
	for (;;) {
		us = next_xmit - ctxt->now_us;
		if (us <= 0) {
			if (xmit_num >= DCC_MAX_XMITS)
				break;

			/* because of the flooding algorithm among DCC servers,
			 * it is important that only a single server receive
			 * reports of the checksums for a mail message.
			 * That implies that retransmissions of reports must
			 * go to the original server, even if some other local
			 * client has re-resolved hostnames or switched
			 * to a better server.
			 * And that means we should not retransmit
			 * if the server address table is changed. */
			if (addrs_gen != class->gen)
				break;

			if (!clnt_xmit(emsg, ctxt, &ctxt->xlog,
				       class, cur_addr, msg, msg_len, 1))
				break;
			us = dcc_retrans_time(cur_addr->rtt, ++xmit_num);
			next_xmit = us + ctxt->now_us;
		}

		/* release the mapped info while we wait for an answer */
		if (!dcc_info_unlock(emsg)) {
			dcc_ctxts_unlock();
			if (srvr_idp)
				*srvr_idp = DCC_ID_INVALID;
			return 0;
		}
		dcc_ctxts_unlock();
		nfds = dcc_select_poll(emsg, ctxt->soc, 1, us);
		if (nfds < 0) {
			/* note error, but we may already have an answer */
			dcc_ctxts_lock();
			class = DCC_GREY2CLASS(flags & DCC_CLNT_FG_GREY);
			break;
		}
		if (!get_now(emsg, ctxt))
			return 0;       /* simply give up if time jumped */

		/* recover the lock so that we can record the result of the
		 * newly arrived answer in the shared and mapped file */
		dcc_ctxts_lock();
		class = DCC_GREY2CLASS(flags & DCC_CLNT_FG_GREY);
		if (!dcc_info_lock(emsg)) {
			dcc_ctxts_unlock();
			if (srvr_idp)
				*srvr_idp = DCC_ID_INVALID;
			return 0;
		}

		if (nfds > 0) {
			for (;;) {
				i = clnt_recv(emsg, ctxt, class, &buf.hdr,
					      min(ISZ(buf), resp_max_len),
					      msg, &ctxt->xlog, &xloge);
				if (i <= 0)
					break;
				if (i == 1) {
					/* stop delaying after the first
					 * ICMP Unreachable message,
					 * but collect everything that has
					 * already arrived */
					unreachable = 1;
					continue;
				}

				update_rtt(ctxt, class, xloge,
					   ctxt->now_us - xloge->sent_us
					   + ((xloge->op != DCC_OP_REPORT
					       && xloge->op != DCC_OP_QUERY)
					      ? cur_addr->srvr_wait : 0));

				/* save the last answer we get */
				memcpy(resp, &buf, ntohs(buf.hdr.len));
				gotit = 1;
			}
			if (i < 0 || unreachable || gotit)
				break;
		}
	}

	/* penalize server for lost packets */
	resp_rates(ctxt, class, &ctxt->xlog, 0);

	/* fail if the server did not answer at all */
	if (!gotit) {
#if 0
		system("./abort_dccd");
#endif
		if (dcc_clnt_debug
		    && emsg && *emsg != '\0')
			dcc_trace_msg("%s", emsg);
		dcc_pemsg(EX_TEMPFAIL, emsg, "no answer from %s after %d ms",
			  addr2str(buf.c, sizeof(buf.c), class,
				   addrs_gen, cur_addr, 0),
			  ctxt->now_us/1000);
		/* Since we got no answer at all, look for a different server.
		 * If we can't find any server or a different server,
		 * then don't try again for a while to not delay sendmail.
		 * If we find another server, then return a valid server-ID
		 * to let the caller know that it can try again immediately */
		if (pick_srvr(0, class) != 2) {
			fail_more(class);
			if (srvr_idp)
				*srvr_idp = DCC_ID_INVALID;
		}
		dcc_info_unlock(0);
		dcc_ctxts_unlock();
		return 0;
	}

	if (!dcc_info_unlock(emsg)) {
		dcc_ctxts_unlock();
		if (srvr_idp)
			*srvr_idp = DCC_ID_INVALID;
		return 0;
	}
	dcc_ctxts_unlock();

	if (dcc_clnt_debug
	    && emsg && *emsg != '\0') {
		dcc_trace_msg("%s", emsg);
		*emsg = '\0';
	}
	return 1;
}
