/* Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include "udm_config.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <ctype.h>

#include "udm_common.h"
#include "udm_utils.h"
#include "udm_proto.h"
#include "udm_url.h"
#include "udm_hrefs.h"
#include "udm_server.h"
#include "udm_xmalloc.h"
#include "udm_host.h"
#include "udm_vars.h"
#include "udm_wild.h"
#include "udm_match.h"
#include "udm_db.h"

#define DEFAULT_PROXY_PORT	3128
#define ERRSTRSIZ 1000

/* return values: 0 on success, non-zero on error */

__C_LINK int __UDMCALL UdmServerAdd(UDM_AGENT *A, UDM_SERVER *srv){
	int		res;
	int		add = 1;
	char		*urlstr= NULL;
	UDM_SERVER	*new = NULL;
	size_t		i;
	UDM_SERVERLIST  S;
	UDM_ENV         *Conf = A->Conf;

	
	if(srv->Match.match_type==UDM_MATCH_BEGIN){
		int follow;
		UDM_URL from;
		size_t len;
		
		/* Copy URL to temp string    */
		/* to keep srv->url unchanged */
		len= 3*strlen(srv->Match.pattern) + 4;
		if ((urlstr = (char*)UdmMalloc(len)) == NULL) return UDM_ERROR;
		UdmURLCanonize(srv->Match.pattern, urlstr, len);
		
		UdmURLInit(&from);
		
		/* Check whether valid URL is passed */
		if((res=UdmURLParse(&from,urlstr))){
			switch(res){
				case UDM_URL_LONG:
					sprintf(Conf->errstr,"URL too long");
					break;
				case UDM_URL_BAD:
				default:
					sprintf(Conf->errstr,"Badly formed URL");
					break;
			}
			UDM_FREE(urlstr);
			UdmURLFree(&from);
			return(UDM_ERROR);
		}
		if((from.hostinfo) && (from.filename == NULL)) {
			/* Add trailing slash                    */
			/* http://localhost -> http://localhost/ */
			udm_snprintf(urlstr, len, "%s://%s%s", from.schema, from.hostinfo, UDM_NULL2EMPTY(from.path));
		}
		
		switch(follow=UdmVarListFindInt(&srv->Vars,"Follow",UDM_FOLLOW_PATH)){
			char * s, * anchor;
			case UDM_FOLLOW_PATH:
				/* Cut before '?' and after last '/' */
				if((anchor=strchr(urlstr,'?')))
					*anchor='\0';
				if((s=strrchr(urlstr,'/')))
					*(s+1)='\0';
				break;

			case UDM_FOLLOW_SITE:
				if (from.hostinfo != NULL) {
					/* Cut after hostinfo */
					udm_snprintf(urlstr, len, "%s://%s/", UDM_NULL2EMPTY(from.schema), from.hostinfo);
				}else{
					/* Cut after first '/' */
					if((s=strchr(urlstr,'/')))
						*(s+1)='\0';
				}
				break;
			
			case UDM_FOLLOW_NO: 
			case UDM_FOLLOW_WORLD:
			default:
				break;
		}
		if (!strcmp(UDM_NULL2EMPTY(from.schema), "news")) {
			char *c, *cc;
			/* Cat server name to remove group names */
			/* This is because group names do not    */
			/* present in message URL                */
			c=urlstr+7;
			cc=strchr(c,'/');
			if(cc)*(cc+1)='\0';
		}
		UdmURLFree(&from);
	}else
	if(srv->Match.match_type==UDM_MATCH_REGEX){
		int err;
		char regerrstr[ERRSTRSIZ]="";
		if(UDM_OK!=(err=UdmMatchComp(&srv->Match,regerrstr,sizeof(regerrstr)-1))){
			udm_snprintf(Conf->errstr,sizeof(Conf->errstr),"Wrong regex in config file: %s: %s", urlstr,regerrstr);
			return(UDM_ERROR);
		}
		urlstr= UdmStrdup(srv->Match.pattern);
	}
	else
	{
		urlstr= UdmStrdup(srv->Match.pattern);
	}
	
	if (!urlstr)return UDM_ERROR; 
	
	for (i = 0; i < Conf->Servers.nservers; i++) {
		if (!strcmp(Conf->Servers.Server[i].Match.pattern, urlstr)) {
			add = 0;
			new = &Conf->Servers.Server[i];
			UDM_FREE(new->Match.pattern);
			break;
		}
	}
	
	if (add) {
		if(Conf->Servers.nservers>=Conf->Servers.mservers){
			Conf->Servers.mservers+=16;
			Conf->Servers.Server=(UDM_SERVER *)UdmXrealloc(Conf->Servers.Server,Conf->Servers.mservers*sizeof(UDM_SERVER));
		}
		new = &Conf->Servers.Server[Conf->Servers.nservers];
		Conf->Servers.nservers++;
		UdmServerInit(new);
	}
	
	UdmVarListReplaceLst(&new->Vars,&srv->Vars,NULL,"*");
	
	new->Match.pattern = (char*)UdmStrdup(urlstr);
	new->Match.nomatch=srv->Match.nomatch;
	new->Match.case_sense=srv->Match.case_sense;
	new->Match.match_type=srv->Match.match_type;
	new->Match.reg = srv->Match.reg;
	new->Match.arg = srv->Match.arg;
	srv->Match.reg = NULL;
	srv->Match.arg = NULL;
	new->command = srv->command;
	new->ordre = srv->ordre;
	new->weight = srv->weight;
	new->MaxHops = srv->MaxHops;
	
	S.Server = new;
	res=UdmSrvAction(A, &S, UDM_SRV_ACTION_ADD);
	srv->site_id = new->site_id;
	
	UDM_FREE(urlstr);
	return(res);
}

void UdmServerFree(UDM_SERVER *Server){
	UdmMatchFree(&Server->Match);
	UdmVarListFree(&Server->Vars);
}

void UdmServerListFree(UDM_SERVERLIST *List){
	size_t i;
	
	for(i=0;i<List->nservers;i++)
		UdmServerFree(&List->Server[i]);
	
	List->nservers=List->mservers=0;
	UDM_FREE(List->Server);
}

/* This fuction finds Server entry for given URL         */
/* and return Alias in "aliastr" if it is not NULL       */
/* "aliastr" must be big enough to store result          */
/* not more than UDM_URLSTR bytes are written to aliastr */

UDM_SERVER * UdmServerFind(UDM_ENV *Conf, UDM_SERVERLIST *List, const char *url, char **aliastr) {
#define NS 10
	
	size_t		i;
	char		*robots=NULL;
	UDM_SERVER	*Res=NULL;
	char            net[32];
	
	/* If it's a robot.txt, cut to hostinfo and find result */
	if((robots=strstr(url,"/robots.txt"))){
		if(!strcmp(robots,"/robots.txt")){
			robots = (char*)UdmStrdup(url);
			robots[strlen(url)-10]='\0';
		}else{
			robots=NULL;
		}
	}
	
	net[0] = '\0';
	for(i=0;i<List->nservers;i++){
 		UDM_SERVER	*srv=&List->Server[i];
 		UDM_MATCH_PART	P[10];
 		const char	*alias=UdmVarListFindStr(&srv->Vars,"Alias",NULL);
		size_t          aliastrlen;
		int             follow = UdmVarListFindInt(&srv->Vars, "Follow", UDM_FOLLOW_PATH);

		if (srv->Match.match_type == UDM_MATCH_SUBNET && *net == '\0') {
		  UDM_CONN conn;
		  UDM_URL  URL;
		  
		  UdmURLInit(&URL);
		  if(UdmURLParse(&URL, url))
		  {
		    UdmURLFree(&URL);
		    continue;
		  }
		  conn.hostname = URL.hostname;
		  conn.port=80;
		  if (UdmHostLookup(&Conf->Hosts, &conn) != -1)
		  {
			unsigned char * h;
			h=(unsigned char*)(&conn.sin.sin_addr);
			snprintf(net, sizeof(net) - 1, "%d.%d.%d.%d", h[0], h[1], h[2], h[3]);
		  }
		  UdmURLFree(&URL);
		}
 		
		if(follow == UDM_FOLLOW_WORLD || !UdmMatchExec(&srv->Match, url, net, 10, P) ) {
			Res=srv;
			if((aliastr != NULL) && (alias != NULL)) {
			        aliastrlen = 128 + strlen(url) + strlen(alias) + strlen(srv->Match.pattern);
				*aliastr = (char*)UdmMalloc(aliastrlen);
				if (*aliastr != NULL)
				  UdmMatchApply(*aliastr, aliastrlen, url, alias, &srv->Match, 10, P);
			}
			break;
		}
	}
	UDM_FREE(robots);
	return(Res);
}

#if 0
static int cmpserver(const void *s1,const void *s2){
	int res;
	
	if(!(res=strlen(((const UDM_SERVER*)s2)->url)-strlen(((const UDM_SERVER*)s1)->url)))
		res=(((const UDM_SERVER*)s2)->rec_id)-(((const UDM_SERVER*)s1)->rec_id);
	return(res);
}
void UdmServerListSort(UDM_SERVERLIST *List){
	/*  Long name should be found first    */
	/*  to allow different options         */
	/*  for server and it's subdirectories */
	UdmSort(List->Server,List->nservers,sizeof(UDM_SERVER),cmpserver);
}
#endif

int UdmSpiderParamInit(UDM_SPIDERPARAM *Spider){
	Spider->period=UDM_DEFAULT_REINDEX_TIME;
	Spider->max_net_errors=UDM_MAXNETERRORS;
	Spider->read_timeout=UDM_READ_TIMEOUT;
	Spider->doc_timeout=UDM_DOC_TIMEOUT;
	Spider->maxhops=UDM_DEFAULT_MAX_HOPS;
	Spider->index=1;
	Spider->follow=UDM_FOLLOW_PATH;
	Spider->use_robots=1;
	Spider->use_clones=1;
	Spider->net_error_delay_time=UDM_DEFAULT_NET_ERROR_DELAY_TIME;
	return UDM_OK;
}
__C_LINK int __UDMCALL UdmServerInit(UDM_SERVER * srv){
	bzero((void*)srv, sizeof(*srv));
	srv->Match.match_type=UDM_MATCH_BEGIN;
	srv->weight = 1; /* default ServerWeight */
	srv->MaxHops = 255; /* default MaxHops value */
	return(0);
}

urlid_t UdmServerGetSiteId(UDM_AGENT *Indexer, UDM_SERVER *srv, UDM_URL *url) {
  char *urlstr;
  UDM_SERVERLIST sl;
  UDM_SERVER S;
  int rc;
  int follow=UdmVarListFindInt(&srv->Vars,"Follow",UDM_FOLLOW_PATH);
  
  if ((srv->Match.match_type == UDM_MATCH_BEGIN) &&
  	(srv->Match.nomatch == 0) &&
        (follow == UDM_FOLLOW_SITE)) {
    return srv->site_id;
  }
  if((urlstr = (char*)UdmMalloc(strlen(UDM_NULL2EMPTY(url->schema)) + strlen(UDM_NULL2EMPTY(url->hostname)) + 10)) == NULL) {
    return 0;
  }
  sprintf(urlstr, "%s://%s/", UDM_NULL2EMPTY(url->schema), UDM_NULL2EMPTY(url->hostname));
  {
    register size_t i;
    for (i = 0; i < strlen(urlstr); i++) urlstr[i] = tolower(urlstr[i]);
  }
  bzero((void*)&S, sizeof(S));
  sl.Server = &S;
  S.Match.pattern     = urlstr;
  S.Match.match_type  = UDM_MATCH_BEGIN;
  S.Match.nomatch     = 0;
  S.command = 'S';
  S.ordre = srv->ordre;
  S.parent = srv->site_id;
  S.weight = srv->weight;
  rc = UdmSrvAction(Indexer, &sl, UDM_SRV_ACTION_ID);
  UDM_FREE(urlstr);
  return (rc == UDM_OK) ? S.site_id : 0;
}
