#!/usr/bin/perl -w

=begin COPYRIGHT

jdresolve - Resolves IP addresses into hostnames 

Copyright (c) 1999 John Douglas Rowell

This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

=end COPYRIGHT
=cut

use strict;
use Net::DNS;
use IO::Select;
use Getopt::Long;
use POSIX qw(strftime);
use DB_File;

my $APPNAME = "jdresolve";
my $VERSION = "0.5.2";
my $AUTHOR = "John Douglas Rowell";
my $YEAR = 1999;
my $BUGS = "bugs\@jdrowell.com";

my %opts = ( stdin => 0, help => 0, version => 0, nostats => 0, recursive => 0, debug => 0, mask => "%i.%c", linecache => 10000, timeout => 30, sockets => 32, database => '' );

my %optctl = ( "" => \$opts{stdin},
	"h" => \$opts{help},
	"help" => \$opts{help},
	"v" => \$opts{version},
	"version" => \$opts{version},
	"n" => \$opts{nostats},
	"nostats" => \$opts{nostats},
	"r" => \$opts{recursive},
	"recursive" => \$opts{recursive},
	"d" => \$opts{debug},
	"debug" => \$opts{debug},
	"m" => \$opts{mask},
	"mask" => \$opts{mask},
	"l" => \$opts{linecache},
	"linecache" => \$opts{linecache},
	"t" => \$opts{timeout},
	"timeout" => \$opts{timeout},
	"s" => \$opts{sockets},
	"sockets" => \$opts{sockets},
	"database" => \$opts{database} );
my @optlst = ("", "h", "help", "v", "version", "n", "nostats", "r", "recursive", "d=i", "debug=i", "m=s", "mask=s", "l=i", "linecache=i", "t=i", "timeout=i", "s=i", "sockets=i", "database=s");

my $usage =<<"-x-";
Usage: $APPNAME [-hvnr] [--help] [--version] [--nostats] [--recursive] [-d <level>] [--debug=<level>] [-m <mask>] [--mask=<mask>] [-l <line cache>] [--linecache=<line cache>] [-t <timeout>] [--timeout=<timeout>] [-s <number of sockets>] [--sockets=<number of sockets>] [--database=<db path>] <LOG FILE>

Report bugs to $BUGS
-x-

my $version =<<"-x-";
$APPNAME $VERSION
Copyright (C) $YEAR $AUTHOR
$APPNAME comes with ABSOLUTELY NO WARRANTY.
You may redistribute copies of $APPNAME
under the terms of the GNU General Public License.
For more information about these matters, see the file named COPYING.
-x-

GetOptions(\%optctl, @optlst);

if ($opts{help}) {
	print <<"-x-"; exit;
$version
$usage
Resolves IP addresses to hostnames. Any file that has lines starting with IP
addresses/hostnames can be resolved, from simple IP lists to Apache combined
log files and more.

   --help or -h
      help (this text).
   --version or -v
      display version information.
   --nostats or -n
      don't display stats after processing
   --recursive or -r
      recurse into C, B and A classes when there is no PTR.
      default is no recursion
   --debug=<level> or -d <level>
      debug mode (no file output, just statistics during run).
      verbosity level range: 1-2
   --timeout=<timeout> or -t <timeout>
      timeout in seconds (for each host resolution).
      default is 30 seconds
   --sockets=<sockets> or -s <sockets>
      maximum number of concurrent sockets to use.
      (use ulimit -a to check the max allowed for your operating system)
      default is 32
   --database=<db path>
      path to database that holds resolve hosts/classes
   --mask=<mask> or -m <mask>
      <mask> accepts %i for IP and %c for class owner.
      ex: "somewhere.in.%c" or "%i.in.%c"
      default if "%i.%c"
   --linecache=<lines> or -l <lines>
      numbers of lines to cache in memory.
      default is 10000
   <LOG FILE>
      the log filename or '-' for STDIN

-x-
}

if ($opts{version}) {
	print <<"-x-"; exit;
$version
try $APPNAME --help for help
-x-
}

if (!defined $ARGV[0] and !$opts{stdin}) {
	print <<"-x-"; exit;
$usage
try $APPNAME --help for help
-x-
}

my $filename = $opts{stdin} ? '-' : $ARGV[0];

my $ipmask = '^(\d+)\.(\d+)\.(\d+)\.(\d+)';
my $res = new Net::DNS::Resolver;
my $sel = new IO::Select;
my %hosts;
my %class;
my %q;
my %socks;
my @lines;
my %DB;
my %stats = ( SENT => 0, RECEIVED => 0, BOGUS => 0, RESOLVED => 0, HRESOLVED => 0, TIMEOUT => 0, STARTTIME => time(), MAXTIME => 0, TOTTIME => 0, TOTLINES => 0, TOTHOSTS => 0);

$filename ne '-' and !-f $filename and die "can't find log file '$filename'";
open FILE, $filename or die "error trying to open log file '$filename'";

$opts{database} ne '' and (tie(%DB, 'DB_File', $opts{database}) or die "can't open database '$opts{database}'");

while (1) {
	getlines();
	$#lines == -1 and last;
	makequeries();
	checkresponse();
	checktimeouts();
	printresults();
}

printstats();


sub addhost {
	my $ip = shift;

	if (exists $hosts{$ip}) { $hosts{$ip}{COUNT}++ } 
	else { $hosts{$ip}{NAME} = '-1'; $hosts{$ip}{COUNT} = 1; $q{$ip} = 0; $stats{TOTHOSTS}++ }

	!$opts{recursive} and return;

	$ip =~ /$ipmask/;

	for ("$3.$2.$1", "$2.$1", "$1") {
		if (exists $class{$_}) { $class{$_}{COUNT}++ }
		else { $class{$_}{NAME} = '-1', $class{$_}{COUNT} = 1, $q{$_} = 0 }
	}
}

sub removehost {
	my $ip = shift;

	if (--$hosts{$ip}{COUNT} < 1) {
		$hosts{$ip}{NAME} ne '-1' and $hosts{$ip}{NAME} ne '-2' and $opts{database} ne '' and $DB{$ip} = $hosts{$ip}{NAME};
		my $resolved = getresolved($ip);
		$resolved ne '-1' and $resolved ne '-2' and $stats{HRESOLVED}++;
		delete $hosts{$ip};
	}

	!$opts{recursive} and return;

	$ip =~ /$ipmask/;

	for ("$3.$2.$1", "$2.$1", "$1") {
		if (--$class{$_}{COUNT} < 1) {
			$class{$_}{NAME} ne '-1' and $class{$_}{NAME} ne '-2' and $opts{database} ne '' and $DB{$_} = $class{$_}{NAME};
			delete $class{$_};
		}
	}
}

sub getresolved {
	my $ip = shift;

	$hosts{$ip}{NAME} eq '-1' and return -1;
	$hosts{$ip}{NAME} ne '-2' and return $hosts{$ip}{NAME};
	exists $DB{$ip} and return $DB{$ip};

	!$opts{recursive} and return '-2';
	
	$ip =~ /$ipmask/;

	for ("$3.$2.$1", "$2.$1", "$1") {
		$class{$_}{NAME} eq '-1' and return '-1';
	}

	for ("$3.$2.$1", "$2.$1", "$1") {	
		$class{$_}{NAME} ne '-2' and return maskthis($ip, $class{$_}{NAME});
	}

	for ("$3.$2.$1", "$2.$1", "$1") {	
		exists $DB{$_} and return maskthis($ip, $DB{$_});
	}

	return '-2'; # totally unresolved
}

sub maskthis {
	my ($ip, $domain) = @_;
	my $masked = $opts{mask};

	$masked =~ s/%i/$ip/;
	$masked =~ s/%c/$domain/;
	
	return $masked;
}

sub getlines {
	eof(FILE) and return;

	my $line;
	while ($#lines < $opts{linecache} - 1 and $line = <FILE>) {
		$stats{TOTLINES}++; 
		push @lines, $line;
		!($line =~ /^$ipmask\s/) and next;
		addhost(($line =~ /^(\S+)/));
	}
}

sub makequeries {
	my @keys = keys %q;

	for (1..($opts{sockets} - $sel->count)) {
		my $query = shift @keys;
		!$query and last;
		($query =~ /$ipmask/) ? query($query, 'H') : query($query, 'C');
		delete $q{$query};
	}
}

sub checkresponse {
	for ($sel->can_read(5)) {
		my $resolved = 0;
		my $fileno = fileno($_);
		my $query = $socks{$fileno}{QUERY};
		my $type = $socks{$fileno}{TYPE};
		my $dnstype = ($type eq 'H') ? 'PTR' : 'SOA';
		my $timespan = time() - $socks{$fileno}{TIME};
		$stats{TOTTIME} += $timespan;

		my $packet = $res->bgread($_);
		$stats{RECEIVED}++;
		$sel->remove($_);
		delete $socks{$fileno};

		if ($packet) {
			for ($packet->answer) {
				$_->type ne $dnstype and next;

				if ($type eq 'H') {
					$resolved = 1;
					$hosts{$query}{NAME} = $_->{ptrdname};
				} else {
					my ($ns, $domain) = $_->{mname} =~ /([^\.]+)\.(.*)/;
					if (defined $domain) {
						defined $class{$query} and $class{$query}{NAME} = $domain;
						$resolved = 1;
					}
				}
			}
		}
		
		if ($resolved) {
			$stats{RESOLVED}++;
			$timespan > $stats{MAXTIME} and $stats{MAXTIME} = $timespan;
		} else {
			$stats{BOGUS}++;
			if ($type eq 'H') { $hosts{$query}{NAME} = '-2' }
			else { defined $class{$query} and $class{$query}{NAME} = '-2' } 
		}
	}
}

sub checktimeouts {
	my $now = time();

	for ($sel->handles) {
		my $fileno = fileno($_);
		my $query = $socks{$fileno}{QUERY};

		my $timespan = $now - $socks{$fileno}{TIME};
		if ($timespan > $opts{timeout}) {
			$stats{TIMEOUT}++;
			$stats{TOTTIME} += $timespan;

			if ($socks{$fileno}{TYPE} eq 'H') { $hosts{$query}{NAME} = '-2' } 
			else { defined $class{$query} and $class{$query}{NAME} = '-2' }

			$sel->remove($_);
			delete $socks{$fileno};
		}
	}
}

sub printresults {
	$opts{debug} >= 1 and print STDERR "Sent: $stats{SENT}, Received: $stats{RECEIVED}, Resolved: $stats{RESOLVED}, Bogus: $stats{BOGUS}, Timeout: $stats{TIMEOUT}\n";

	while ($#lines != -1) {
		my $line = $lines[0];
		!($line =~ /^$ipmask\s/) and print($line), shift @lines, next;

		my ($ip) = $line =~ /^(\S+)/;
		my $resolved = getresolved($ip);
		$resolved eq '-1' and last;
		$resolved ne '-2' and $line =~ s/^(\S+)/$resolved/;

		print $line;
		shift @lines;
		removehost($ip);
	}
}

sub printstats {
	$opts{nostats} and return;

	my $timespan = time() - $stats{STARTTIME};

	print STDERR 
		"     Total Lines: $stats{TOTLINES}\n",
		"     Total Time : ", 
			strftime("%H:%M:%S", gmtime($timespan)), " (", 
			sprintf("%.2f", $timespan ? ($stats{TOTLINES} / $timespan) : 0), " lines/s)\n",
		"     Total Hosts: $stats{TOTHOSTS}\n",
		"  Resolved Hosts: $stats{HRESOLVED} (", 
			sprintf("%.2f", $stats{TOTHOSTS} ? ($stats{HRESOLVED} / $stats{TOTHOSTS} * 100) : 0), "%)\n",
		"Unresolved Hosts: ", 
			$stats{TOTHOSTS} - $stats{HRESOLVED}, " (", 
			sprintf("%.2f", $stats{TOTHOSTS} ? (($stats{TOTHOSTS} - $stats{HRESOLVED}) / $stats{TOTHOSTS} * 100) : 0), "%)\n",
		"Average DNS time: ", 
			sprintf("%.4f", $stats{SENT} ? ($stats{TOTTIME} / $stats{SENT}) : 0), "s per request\n",
		"    Max DNS time: ", $stats{MAXTIME}, "s (consider this value for your timeout)\n";
}

sub query {
	my ($find, $type) = @_;
	my $send = ($type eq 'H') ? $find : ("$find.in-addr.arpa");

	my $sock = $res->bgsend($send, ($type eq 'H') ? 'PTR' : 'SOA');
	!$sock and die "Error opening socket for bgsend. Are we out of sockets?";
	$stats{SENT}++;
	$sel->add($sock);
	my $fileno = fileno($sock);
	$socks{$fileno}{TIME} = time();
	$socks{$fileno}{QUERY} = $find;
	$socks{$fileno}{TYPE} = $type;

	return $fileno;
}

