#!/usr/bin/perl

use strict;
use warnings;

use Cwd;
use Data::Dumper;
use File::Tail;
use Getopt::Std;

$|++;
$Data::Dumper::Sortkeys = 1;

our $opt_l = 0;
getopts('l');

my (%plugins, %plugin_aliases, %seen_plugins, %pids);
my %hide_plugins = map { $_ => 1 } qw/ hostname /;

my $qpdir = get_qp_dir();
my $file  = "$qpdir/log/main/current";
populate_plugins_from_registry();
my @sorted_plugins =
  sort { $plugins{$a}{id} <=> $plugins{$b}{id} } keys %plugins;

my $fh = File::Tail->new(
                         name        => $file,
                         interval    => 1,
                         maxinterval => 1,
                         debug       => 1,
                         tail        => 1000
                        );
my $printed = 0;
my $has_cleanup;

my %formats  = get_default_field_widths();
my %formats3 = ( %formats, map { $_ => "%-3.3s" } qw/ badrcptto check_badrcptto
        qmail_deliverable rcpt_ok check_basicheaders headers uribl bogus_bounce
        check_bogus_bounce domainkeys dkim dmarc spamassassin dspam
        virus::clamdscan / );

while (defined(my $line = $fh->read)) {
    chomp $line;
    $line =~ s/[^[ -~]]//g;   # strip out binary/unprintable
    next if !$line;
    my ($type, $pid, $hook, $plugin, $message) = parse_line($line);
    next if !$type;
    next if $type =~ /^(?:info|unknown|response|tcpserver)$/;
    next if $type eq 'init';    # doesn't occur in all deployment models

    if (!$pids{$pid}) {         # haven't seen this pid
        next if $type ne 'connect';    # ignore unless connect
        my ($host, $ip) = split /\s/, $message;
        $ip = substr $ip, 1, -1;
        foreach (keys %seen_plugins, qw/ helo_host from to /) {
            $pids{$pid}{$_} = '';    # define them
        }
        $pids{$pid}{ip} = $ip;
        $pids{$pid}{hostname} = $host if $host ne 'Unknown';
    }

    if ($type eq 'close') {
        next if $has_cleanup;          # it'll get handled later
        print_auto_format($pid, $line);
        delete $pids{$pid};
    }
    elsif ($type eq 'cleanup') {
        print_auto_format($pid, $line);
        delete $pids{$pid};
    }
    elsif ($type eq 'plugin') {
        handle_plugin($message,$plugin,$pid,$line);
    }
    elsif ($type eq 'reject')  { }
    elsif ($type eq 'connect') { }
    elsif ($type eq 'dispatch') {
        handle_dispatch($message,$pid,$line);
    }
    else {
        print "$type $pid $hook $plugin $message\n";
    }
}

sub get_default_field_widths {
    my %widths = (
        ip                          => "%-15.15s",
        hostname                    => "%-20.20s",
        'ident::geoip'              => $opt_l ? "%-20.20s" : "%-6.6s",
        'ident::p0f'                => "%-10.10s",
        distance                    => "%5.5s",
        count_unrecognized_commands => "%-5.5s",
        unrecognized_commands       => "%-5.5s",
        connection_time             => "%-4.4s",
    map { $_ => "%-3.3s" }
    qw/ dnsbl rhsbl relay karma fcrdns earlytalker check_earlytalker helo
        tls auth::auth_vpopmail auth::auth_vpopmaild auth::auth_vpopmail_sql
        auth::auth_checkpassword badmailfrom check_badmailfrom
        sender_permitted_from resolvable_fromhost dont_require_anglebrackets
        queue::qmail-queue queue::smtp-forward /
    );

    return %widths;
}

sub handle_plugin {
    my ($message, $plugin, $pid, $line) = @_;
    return if $plugin eq 'naughty';   # housekeeping only
    if (!$pids{$pid}{$plugin}) {     # first entry for this plugin
        $pids{$pid}{$plugin} = $message;
    }
    else {                           # subsequent log entry for this plugin
        if ($pids{$pid}{$plugin} !~ /^(?:pass|fail|skip)/i) {
            $pids{$pid}{$plugin} = $message;    # overwrite 1st
        }
        else {
            #print "ignoring subsequent hit on $plugin: $message\n";
        }
    }

    if ($plugin eq 'ident::geoip') {
        if (length $message < 3) {
            $formats{'ident::geoip'}  = "%-3.3s";
            $formats3{'ident::geoip'} = "%-3.3s";
        }
        else {
            my ($gip, $distance) = $message =~ /(.*?),\s+([\d]+)\skm/;
            if ($distance) {
                $pids{$pid}{$plugin} = $gip;
                $pids{$pid}{distance} = $distance;
            }
        }
    }
}

sub handle_dispatch {
    my ($message, $pid, $line) = @_;
    if ($message =~ /^dispatching MAIL FROM/i) {
        my ($from) = $message =~ /<(.*?)>/;
        $pids{$pid}{from} = $from || '';
    }
    elsif ($message =~ /^dispatching RCPT TO/i) {
        my ($to) = $message =~ /<(.*?)>/;
        $pids{$pid}{to} = $to || '';
    }
    elsif ($message =~ m/dispatching (EHLO|HELO) (.*)/) {
        $pids{$pid}{helo_host} = $2 || '';
    }
    elsif ($message eq 'dispatching DATA')     { }
    elsif ($message eq 'dispatching QUIT')     { }
    elsif ($message eq 'dispatching STARTTLS') { }
    elsif ($message eq 'dispatching RSET') {
        print_auto_format($pid, $line);
    }
    else {
        # anything here is likely an unrecognized command
        #print "$message\n";
    }
}

sub parse_line {
    my $line = shift;
    my ($tai, $pid, $message) = split /\s+/, $line, 3;
    return if !$message;    # garbage in the log file

    # lines seen many times per connection
    return parse_line_plugin($line) if substr($message, 0, 1) eq '(';
    return 'dispatch', $pid, undef, undef, $message
      if substr($message, 0, 12) eq 'dispatching ';
    return 'response', $pid, undef, undef, $message
      if $message =~ /^[2|3]\d\d/;
    return 'tcpserver', $pid, undef, undef, undef
      if substr($pid, 0, 10) eq 'tcpserver:';

    # lines seen about once per connection
    return 'init', $pid, undef, undef, $message
      if substr($message, 0, 19) eq 'Accepted connection';
    return 'connect', $pid, undef, undef, substr($message, 16)
      if substr($message, 0, 15) eq 'Connection from';
    return 'close', $pid, undef, undef, $message
      if substr($message, 0, 6) eq 'close ';
    return 'close', $pid, undef, undef, $message
      if substr($message, 0, 20) eq 'click, disconnecting';
    return parse_line_cleanup($line)
      if substr($message, 0, 11) eq 'cleaning up';

    # lines seen less than once per connection
    return 'info', $pid, undef, undef, $message
      if $message eq 'spooling message to disk';
    return 'reject', $pid, undef, undef, $message
      if $message =~ /^[4|5]\d\d/;
    return 'reject', $pid, undef, undef, $message
      if substr($message, 0, 14) eq 'deny mail from';
    return 'reject', $pid, undef, undef, $message
      if substr($message, 0, 18) eq 'denysoft mail from';
    return 'info', $pid, undef, undef, $message
      if substr($message, 0, 15) eq 'Lost connection';
    return 'info', $pid, undef, undef, $message
      if $message eq 'auth success cleared naughty';
    return 'info', $pid, undef, undef, $message
      if substr($message, 0, 15) eq 'Running as user';
    return 'info', $pid, undef, undef, $message
      if substr($message, 0, 16) eq 'Loaded Qpsmtpd::';
    return 'info', $pid, undef, undef, $message
      if substr($message, 0, 24) eq 'Permissions on spool_dir';
    return 'info', $pid, undef, undef, $message
      if substr($message, 0, 13) eq 'Listening on ';
    return 'info', $pid, undef, undef, $message
      if substr($message, 0, 15) eq 'Loaded logging/';

    return 'err', $pid, undef, undef, $message
      if $line =~ /at [\S]+ line \d/;    # generic perl error
    print "UNKNOWN LINE: $line\n";
    return 'unknown', $pid, undef, undef, $message;
}

sub parse_line_plugin {
    my ($line) = @_;

# @tai 13486 (connect) ident::p0f: Windows (XP/2000 (RFC1323+, w, tstamp-))
# @tai 13681 (connect) dnsbl: fail, NAUGHTY
# @tai 15787 (connect) karma: pass, no penalty (0 naughty, 3 nice, 3 connects)
# @tai 77603 (queue) queue::qmail_2dqueue: (for 77590) Queuing to /var/qmail/bin/qmail-queue
    my ($tai, $pid, $hook, $plugin, $message) = split /\s/, $line, 5;
    $plugin =~ s/:$//;
    if ($plugin =~ /_3a/) {
        ($plugin) = split /_3a/, $plugin;    # trim :N off the plugin log entry
    }
    $plugin =~ s/_2d/-/g;

    $plugin = $plugin_aliases{$plugin}
      if $plugin_aliases{$plugin};           # map alias to master
    if ($hook eq '(queue)') {
        ($pid) = $message =~ /\(for ([\d]+)\)\s/;
        $message = 'pass';
    }

    return 'plugin', $pid, $hook, $plugin, $message;
}

sub parse_line_cleanup {
    my ($line) = @_;

    # @tai 85931 cleaning up after 3210
    my $pid = (split /\s+/, $line)[-1];
    $has_cleanup++;
    return 'cleanup', $pid, undef, undef, $line;
}

sub print_auto_format {
    my ($pid, $line) = @_;

    my $format;
    my @headers;
    my @values;

    foreach my $plugin (qw/ ip hostname distance /, @sorted_plugins) {
        if (defined $pids{$pid}{$plugin}) {
            if (!$seen_plugins{$plugin}) {    # first time seeing this plugin
                $printed = 0;                 # force header print
            }
            $seen_plugins{$plugin}++;
        }

        next if !$seen_plugins{$plugin};      # hide unused plugins
        if ($hide_plugins{$plugin}) {         # user doesn't want to see
            delete $pids{$pid}{$plugin};
            next;
        }

        my $wide = $opt_l ? 20 : 8;

        if (defined $pids{$pid}{helo_host} && $plugin =~ /helo/) {
            $format .= " %-$wide.${wide}s";
            push @values, substr(delete $pids{$pid}{helo_host}, -$wide, $wide);
            push @headers, 'HELO';
        }
        elsif (defined $pids{$pid}{from} && $plugin =~ /from/) {
            $format .= " %-$wide.${wide}s";
            push @values, substr(delete $pids{$pid}{from}, -$wide, $wide);
            push @headers, 'MAIL FROM';
        }
        elsif (defined $pids{$pid}{to} && $plugin =~ /to|rcpt|recipient/) {
            $format .= " %-$wide.${wide}s";
            push @values,  delete $pids{$pid}{to};
            push @headers, 'RCPT TO';
        }

        $format .= $formats3{$plugin} ? " $formats3{$plugin}" : " %-10.10s";

        if (defined $pids{$pid}{$plugin}) {
            push @values, show_symbol(delete $pids{$pid}{$plugin});
        }
        else {
            push @values, '';
        }
        push @headers,
          ($plugins{$plugin}{abb3} ? $plugins{$plugin}{abb3} : $plugin);
    }
    $format .= "\n";
    printf("\n$format", @headers) if (!$printed || $printed % 20 == 0);
    printf($format, @values);
    #print Data::Dumper::Dumper($pids{$pid}) if keys %{$pids{$pid}};
    $printed++;
}

sub show_symbol {
    my $mess = shift;
    return ' o' if $mess eq 'TLS setup returning';
    return ' o' if $mess eq 'pass';
    return ' -' if $mess eq 'skip';
    return ' x' if 'fail, tolerated' eq substr($mess, 0, 15);
    return ' X' if $mess eq 'fail';
    return ' -' if $mess =~ /^skip[,:\s]/i;
    return ' o' if $mess =~ /^pass[,:\s]/i;
    return ' X' if $mess =~ /^fail[,:\s]/i;
    return ' x' if $mess =~ /^negative[,:\s]/i;
    return ' o' if $mess =~ /^positive[,:\s]/i;
    return ' !' if $mess =~ /^error[,:\s]/i;
    $mess =~ s/\s\s/ /g;
    return $mess;
}

sub get_qp_dir {
    foreach my $user (qw/ qpsmtpd smtpd /) {
        my ($homedir) = (getpwnam($user))[7] or next;

        if (-d "$homedir/plugins") {
            return "$homedir";
        }
        foreach my $s (qw/ smtpd qpsmtpd qpsmtpd-dev /) {
            if (-d "$homedir/$s/plugins") {
                return "$homedir/$s";
            }
        }
    }
    if (-d "./plugins") {
        return Cwd::getcwd();
    }
}

sub populate_plugins_from_registry {

    my $file = "$qpdir/plugins/registry.txt";
    if (!-f $file) {
        die "unable to find plugin registry\n";
    }

    open my $F, '<', $file;
    while (defined(my $line = <$F>)) {
        next if $line =~ /^#/;    # discard comments
        chomp $line;
        next if ! $line;
        my ($id, $name, $abb3, $abb5, $aliases) = split /\s+/, $line;
        next if !defined $name;
        $plugins{$name} = {id => $id, abb3 => $abb3, abb5 => $abb5};

        next if !$aliases;
        $aliases =~ s/\s+//g;
        $plugins{$name}{aliases} = $aliases;
        foreach my $a (split /,/, $aliases) {
            $plugin_aliases{$a} = $name;
        }
    }
}

__END__

=head1 NAME

Summarize

=head2 SYNOPSIS

Parse the qpsmtpd logs and display a one line summary of each connection

=head2 EXAMPLES

 ip              dista geo    p0f        krm dbl rly dns ear HELO     hlo tls MAIL FRO bmf rbl rfh spf RCPT TO  bto qmd rok tim
 192.48.85.146    2705 NA, US FreeBSD 9.  o   o   -   o   -  tnpi.net  o   o                                                0.55
 190.194.22.35    7925 SA, AR Windows 7   X   X   -   X   o  a.net.ar  x      ogle.com  o   o   o   x  *o*g@sim  o   o   o  2.72
 192.48.85.146    2705 NA, US  -          o   o   -   o   -  tnpi.net  o                                                    0.41
 181.164.160.98   8493 SA, AR Windows 7   X   X   -   X   o  l.com.ar  x      ogle.com  o   o   o   x  trapped@  o   o   o  2.61
 188.79.146.22    8381 EU, ES Windows 7   o   X   -   o   o  zztel.es  o      ogle.com  o   o   o   x  *o**an@s  o   o   o  3.02
 188.79.146.22    8381 EU, ES Windows 7   o   X   -   o   o  zztel.es  o      ogle.com  o   o   o   x  *o**an@s  o   o   o  2.58
 188.79.146.22    8381 EU, ES Windows 7   o   X   -   o   o  zztel.es  o      ogle.com  o   o   o   x  *o**an@s  o   o   o  2.70
 190.194.22.35    7925 SA, AR Windows 7   X   X   -   X   o  a.net.ar  x      ogle.com  o   o   o   x  do*g@s*m  o   o   o  2.60

 ip              dista geo    p0f        krm dbl rly dns ear HELO     hlo tls MAIL FRO bmf rbl rfh spf RCPT TO  bto qmd rok bog hdr dky dkm dmc spm dsp clm qqm tim
 192.48.85.146    2705 NA, US FreeBSD 9.  o   o   -   o   -  tnpi.net  o   o                                                                                    1.36
 192.48.85.146    2705 NA, US  -          o   o   -   o   -  tnpi.net  o                                                                                        0.36
 66.175.56.179    2313 NA, US Linux 2.6.  o   o   -   o   -  zone.com  o   o  chem.com  o   o   o   -  d**n@the  o   o   o   o   o   -   o   -   -   -   -   o  2.86
 190.237.55.32    5411 SA, PE Windows 7   o   X   -   X   o  gtsgnvnu  x      ryrk.net  o   o   x   -  *an@s*rl  o   o   o                                      3.54
 192.48.85.146    2705 NA, US  -          o   o   -   o   -  tnpi.net  o                                                                                        0.20
 207.171.174.77   2700 NA, US             o   o   -   o   -  azon.com  o      azon.com  o   o   o   o  *a*e@s*r  o   o   o   o   o   -   o   o   o   o   o   o  7.27
 201.141.78.4     1487 NA, MX Windows XP  o   X   -   X   o  fmhufhjo  x      fdvx.net  o   o   x   -  d**@si*e  o   o   o                                      2.95
 201.141.78.4     1487 NA, MX Windows XP  X   X   -   X   o  fmhufhjo  x      fdvx.net  o   o   x   -  d**@s*rl  o   o   o                                      2.42

The display autosizes to display disposition results for as many plugins as are emitting logs. The 3 char abbreviations are listed with their full plugin names in plugins/registry.txt. The GeoIP, p0f, HELO, FROM, and RCPT fields are compressed to fit on a typical display. If you have a wider display, use the -l option to display longer lines and more detail.

Starting from left to right, in the first block, the results are interpreted as follows:

 geo - We see 2 connections from N. America, 3 from S. America, and 3 from Europe.
 p0f - One system is running FreeBSD and the rest are running Windows 7.
 krm - 3 of the connections will be rejected because of bad karma (sender history)
 dbl - 7 are from IPs on DNS blacklists, an offense worth rejecting for.
 rly - None of the IPs have relay permission.
 dns - Only three senders have Forward Confirmed Reverse DNS
 ear - two connections skipped testing (good karma), and the rest passed
 hlo - three of the senders failed to present valid HELO hostnames
 tls - one sender negotiated TLS
 bmf - none of the senders presented a from address in our badmailfrom list
 rbl - none of the sender domains are in a RHS blocking list
 rfh - resolvable_from_host: all the sender domains resolve
 spf - all but two connections fail SPF, meaning they are forging the envelope sender identity
 bto - badmailto: none of the recipients are in our badmailto list
 qmd - qmail_deliverable: the recipients are valid addresses on our system
 rok - the recipient domain is on our system
 tim - the number of seconds the connection was active

In the second block, we have two messages that were ultimately delivered.

 bog - no messages were bogus bounces
 hdr - the messages had valid headers
 dky - the messages were not DomainKeys signed
 dkm - two messages were DKIM signed and passed validation
 dmc - the message from amazon.com passed DMARC validation
 spm - spamassassin, one skipped processing, one passed
 dsp - dspam, one skipped, one passed
 clm - clamav, one skipped, one passed
 qqm - qmail queue, two messages were delivered

In the first block of entries, not a single connection made it past the DATA phase of the SMTP conversation, where the content tests kick in. Other interesting observations are that many connections purport to be from Google. Ah, you say, but does Google have Windows mail servers in Estonia? If we look over to the SPF column, the lower case x is telling us that it failed SPF tests, meaning Google has explicitely told us that IP is not theirs. Instead of rejecting immediately, the SPF plugin deferred the rejection to B<naughty> to disconnect later.

=head1 AUTHOR

Matt Simerson <msimerson@cpan.org>

=cut