dspam: a batch of improvements:
expanded POD cleaned up stray EOL spaces added lots of logging, with standardized [ pass | fail | skip ] prefixes added reject_type option use split for parsing dspam headers use SA note instead of parsing headers added reject = agree option store & fetch dspam results in a note
This commit is contained in:
parent
d644c24c83
commit
205120f26f
233
plugins/dspam
233
plugins/dspam
@ -12,12 +12,19 @@ train dspam.
|
||||
Adds the X-DSPAM-Result and X-DSPAM-Signature headers to messages. The latter is essential for
|
||||
training dspam and the former is useful to MDAs, MUAs, and humans.
|
||||
|
||||
Adds a transaction note to the qpsmtpd transaction. The notes is a hashref
|
||||
with at least the 'class' field (Spam,Innocent,Whitelisted). It will normally
|
||||
contain a probability and confidence ratings as well.
|
||||
|
||||
=head1 TRAINING DSPAM
|
||||
|
||||
Do not just enable dspam! Its false positive rate when untrained is high. The
|
||||
good news is; dspam learns very, very fast.
|
||||
|
||||
To get dspam into a useful state, it must be trained. The best method way to
|
||||
train dspam is to feed it two large equal sized corpuses of spam and ham from
|
||||
your mail server. The dspam authors suggest avoiding public corpuses. I do
|
||||
this as follows:
|
||||
your mail server. The dspam authors suggest avoiding public corpuses. I train
|
||||
dspam as follows:
|
||||
|
||||
=over 4
|
||||
|
||||
@ -25,34 +32,31 @@ this as follows:
|
||||
|
||||
See the docs on the learn_from_sa feature in the CONFIG section.
|
||||
|
||||
=item daily training
|
||||
=item periodic training
|
||||
|
||||
I have a script that crawls the contents of every users maildir each night.
|
||||
The script builds two lists of messages: ham and spam.
|
||||
|
||||
The spam message list consists of all read messages in folders named Spam
|
||||
that have changed since the last spam learning run (normally 1 day).
|
||||
I have a script that searches the contents of every users maildir. Any read
|
||||
messages that have changed since the last processing run are learned as ham
|
||||
or spam.
|
||||
|
||||
The ham message list consists of read messages in any folder not named like
|
||||
Spam, Junk, Trash, or Deleted. This catches messages that users have read
|
||||
and left in their inbox, filed away into subfolders, and
|
||||
and left in their inbox or filed away into subfolders.
|
||||
|
||||
=item on-the-fly training
|
||||
|
||||
The dovecot IMAP server has an antispam plugin that will train dspam when
|
||||
messages are moved to/from the Spam folder.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
|
||||
=head1 CONFIG
|
||||
|
||||
=over 4
|
||||
|
||||
=item dspam_bin
|
||||
=head2 dspam_bin
|
||||
|
||||
The path to the dspam binary. If yours is installed somewhere other
|
||||
than /usr/local/bin/dspam, you'll need to set this.
|
||||
|
||||
=item learn_from_sa
|
||||
=head2 learn_from_sa
|
||||
|
||||
Dspam can be trained by SpamAssassin. This relationship between them requires
|
||||
attention to several important details:
|
||||
@ -113,18 +117,26 @@ only supports storing the signature in the headers. If you want to train dspam
|
||||
after delivery (ie, users moving messages to/from spam folders), then the
|
||||
dspam signature must be in the headers.
|
||||
|
||||
When using the dspam MySQL backend, use InnoDB tables. Dspam training
|
||||
is dramatically slowed by MyISAM table locks and dspam requires lots
|
||||
of training. InnoDB has row level locking and updates are much faster.
|
||||
|
||||
=back
|
||||
|
||||
=item reject
|
||||
=head2 reject
|
||||
|
||||
Set to a floating point value between 0 and 1.00 where 0 is no confidence
|
||||
and 1.0 is 100% confidence.
|
||||
|
||||
If dspam's confidence is greater than or equal to this threshold, the
|
||||
message will be rejected.
|
||||
message will be rejected. The default is 1.00.
|
||||
|
||||
=back
|
||||
=head2 reject_type
|
||||
|
||||
reject_type [ temp | perm ]
|
||||
|
||||
By default, rejects are permanent (5xx). Set this to temp if you want to
|
||||
defer mail instead of rejecting it with dspam.
|
||||
|
||||
=head1 MULTIPLE RECIPIENT BEHAVIOR
|
||||
|
||||
@ -139,9 +151,14 @@ ie, (Trust smtpd).
|
||||
|
||||
=head1 CHANGES
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Matt Simerson - 2012
|
||||
|
||||
=cut
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Qpsmtpd::Constants;
|
||||
use Qpsmtpd::DSN;
|
||||
@ -149,43 +166,46 @@ use IO::Handle;
|
||||
use Socket qw(:DEFAULT :crlf);
|
||||
|
||||
sub register {
|
||||
my ($self, $qp, @args) = @_;
|
||||
my ($self, $qp, %args) = @_;
|
||||
|
||||
$self->log(LOGERROR, "Bad parameters for the dspam plugin") if @_ % 2;
|
||||
|
||||
%{$self->{_args}} = @args;
|
||||
$self->{_args} = { %args };
|
||||
$self->{_args}{reject} = defined $args{reject} ? $args{reject} : 1;
|
||||
$self->{_args}{reject_type} = $args{reject_type} || 'perm';
|
||||
|
||||
$self->register_hook('data_post', 'dspam_reject')
|
||||
if $self->{_args}->{reject};
|
||||
$self->register_hook('data_post', 'dspam_reject');
|
||||
}
|
||||
|
||||
sub hook_data_post {
|
||||
my ($self, $transaction) = @_;
|
||||
|
||||
$self->log(LOGDEBUG, "check_dspam");
|
||||
return (DECLINED) if $transaction->data_size > 500_000;
|
||||
if ( $transaction->data_size > 500_000 ) {
|
||||
$self->log(LOGINFO, "skip: message too large (" . $transaction->data_size . ")" );
|
||||
return (DECLINED);
|
||||
};
|
||||
|
||||
my $username = $self->select_username( $transaction );
|
||||
my $message = $self->assemble_message($transaction);
|
||||
my $filtercmd = $self->get_filter_cmd( $transaction, $username );
|
||||
$self->log(LOGWARN, $filtercmd);
|
||||
$self->log(LOGDEBUG, $filtercmd);
|
||||
|
||||
my $response = $self->dspam_process( $filtercmd, $message );
|
||||
if ( ! $response ) {
|
||||
$self->log(LOGWARN, "No response received from dspam. Check your logs for errors.");
|
||||
$self->log(LOGWARN, "skip: no response from dspam. Check logs for errors.");
|
||||
return (DECLINED);
|
||||
};
|
||||
$self->log(LOGWARN, $response);
|
||||
|
||||
# X-DSPAM-Result: user@example.com; result="Spam"; class="Spam"; probability=1.0000; confidence=1.00; signature=N/A
|
||||
# X-DSPAM-Result: smtpd; result="Innocent"; class="Innocent"; probability=0.0023; confidence=1.00; signature=4f8dae6a446008399211546
|
||||
my ($result,$prob,$conf,$sig) = $response =~ /result=\"(Spam|Innocent)\";.*?probability=([\d\.]+); confidence=([\d\.]+); signature=(.*)/;
|
||||
my $header_str = "$result, probability=$prob, confidence=$conf";
|
||||
$self->log(LOGWARN, $header_str);
|
||||
$transaction->header->add('X-DSPAM-Result', $header_str, 0);
|
||||
$self->log(LOGDEBUG, $header_str);
|
||||
$transaction->header->replace('X-DSPAM-Result', $header_str, 0);
|
||||
|
||||
# the signature header is required if you intend to train dspam later
|
||||
# you must set Preference "signatureLocation=headers" in dspam.conf
|
||||
# the signature header is required if you intend to train dspam later.
|
||||
# In dspam.conf, set: Preference "signatureLocation=headers"
|
||||
$transaction->header->add('X-DSPAM-Signature', $sig, 0);
|
||||
|
||||
return (DECLINED);
|
||||
@ -228,16 +248,17 @@ sub dspam_process {
|
||||
#return $self->dspam_process_open2( $filtercmd, $message );
|
||||
|
||||
my ($in_fh, $out_fh);
|
||||
if (! open($in_fh, "-|")) {
|
||||
if (! open($in_fh, '-|')) {
|
||||
open($out_fh, "|$filtercmd") or die "Can't run $filtercmd: $!\n";
|
||||
print $out_fh $message;
|
||||
close $out_fh;
|
||||
exit(0);
|
||||
};
|
||||
my $response = join('', <$in_fh>);
|
||||
#my $response = join('', <$in_fh>);
|
||||
my $response = <$in_fh>;
|
||||
close $in_fh;
|
||||
chomp $response;
|
||||
|
||||
$self->log(LOGDEBUG, $response);
|
||||
return $response;
|
||||
};
|
||||
|
||||
@ -252,37 +273,107 @@ sub dspam_process_open2 {
|
||||
my $pid = open2($dspam_out, $dspam_in, $filtercmd);
|
||||
print $dspam_in $message;
|
||||
close $dspam_in;
|
||||
my $response = join('', <$dspam_out>);
|
||||
#my $response = join('', <$dspam_out>); # get full response
|
||||
my $response = <$dspam_out>; # get first line only
|
||||
waitpid $pid, 0;
|
||||
chomp $response;
|
||||
$self->log(LOGDEBUG, $response);
|
||||
return $response;
|
||||
};
|
||||
|
||||
sub dspam_reject {
|
||||
my ($self, $transaction) = @_;
|
||||
|
||||
return (DECLINED) if ! $self->{_args}->{reject};
|
||||
my $d = $self->get_dspam_results( $transaction ) or return;
|
||||
|
||||
my $status = $transaction->header->get('X-DSPAM-Result') or do {
|
||||
$self->log(LOGWARN, "dspam_reject: failed to find the dspam header");
|
||||
return (DECLINED);
|
||||
};
|
||||
my ($clas,$probability,$confidence) = $status =~ m/^(Spam|Innocent), probability=([\d\.]+), confidence=([\d\.]+)/i;
|
||||
|
||||
$self->log(LOGDEBUG, "dspam $clas, prob: $probability, conf: $confidence");
|
||||
|
||||
if ( $clas eq 'Spam' && $probability == 1 && $confidence == 1 ) {
|
||||
# default of media_unsupported is DENY, so just change the message
|
||||
if ( $self->qp->connection->relay_client ) {
|
||||
$self->log(LOGWARN, "allowing spam since user authenticated");
|
||||
return DECLINED;
|
||||
};
|
||||
return Qpsmtpd::DSN->media_unsupported('dspam says, no spam please');
|
||||
if ( ! $d->{class} ) {
|
||||
$self->log(LOGWARN, "skip: no dspam class detected");
|
||||
return DECLINED;
|
||||
};
|
||||
|
||||
return DECLINED;
|
||||
my $status = "$d->{class}, $d->{confidence} c.";
|
||||
my $reject = $self->{_args}{reject} or do {
|
||||
$self->log(LOGINFO, "skip: reject disabled ($status)");
|
||||
return DECLINED;
|
||||
};
|
||||
|
||||
if ( $reject eq 'agree' ) {
|
||||
return $self->dspam_reject_agree( $transaction, $d );
|
||||
};
|
||||
if ( $d->{class} eq 'Innocent' ) {
|
||||
$self->log(LOGINFO, "pass: $status");
|
||||
return DECLINED;
|
||||
};
|
||||
if ( $self->qp->connection->relay_client ) {
|
||||
$self->log(LOGINFO, "skip: allowing spam, user authenticated ($status)");
|
||||
return DECLINED;
|
||||
};
|
||||
if ( $d->{probability} <= $reject ) {
|
||||
$self->log(LOGINFO, "pass, $d->{class} probability is too low ($d->{probability} < $reject)");
|
||||
return DECLINED;
|
||||
};
|
||||
if ( $d->{confidence} != 1 ) {
|
||||
$self->log(LOGINFO, "pass: $d->{class} confidence is too low ($d->{confidence})");
|
||||
return DECLINED;
|
||||
};
|
||||
|
||||
# dspam is more than $reject percent sure this message is spam
|
||||
$self->log(LOGINFO, "fail: $d->{class}, ($d->{confidence} confident)");
|
||||
my $deny = $self->{_args}{reject_type} eq 'temp' ? DENYSOFT : DENY;
|
||||
return Qpsmtpd::DSN->media_unsupported($deny,'dspam says, no spam please');
|
||||
}
|
||||
|
||||
sub dspam_reject_agree {
|
||||
my ($self, $transaction, $d ) = @_;
|
||||
|
||||
my $sa = $transaction->notes('spamassassin' );
|
||||
|
||||
my $status = "$d->{class}, $d->{confidence} c";
|
||||
|
||||
if ( ! $sa->{is_spam} ) {
|
||||
$self->log(LOGINFO, "pass: cannot agree, SA results missing ($status)");
|
||||
return DECLINED;
|
||||
};
|
||||
|
||||
if ( $d->{class} eq 'Spam' && $sa->{is_spam} eq 'Yes' ) {
|
||||
$self->log(LOGINFO, "fail: agree, $status");
|
||||
return Qpsmtpd::DSN->media_unsupported(DENY,'we agree, no spam please');
|
||||
};
|
||||
|
||||
$self->log(LOGINFO, "pass: agree, $status");
|
||||
return DECLINED;
|
||||
};
|
||||
|
||||
sub get_dspam_results {
|
||||
my ( $self, $transaction ) = @_;
|
||||
|
||||
if ( $transaction->notes('dspam') ) {
|
||||
return $transaction->notes('dspam');
|
||||
};
|
||||
|
||||
my $string = $transaction->header->get('X-DSPAM-Result') or do {
|
||||
$self->log(LOGWARN, "get_dspam_results: failed to find the header");
|
||||
return;
|
||||
};
|
||||
|
||||
my @bits = split(/,\s+/, $string); chomp @bits;
|
||||
my $class = shift @bits;
|
||||
my %d;
|
||||
foreach (@bits) {
|
||||
my ($key,$val) = split(/=/, $_);
|
||||
$d{$key} = $val;
|
||||
};
|
||||
$d{class} = $class;
|
||||
|
||||
my $message = $d{class};
|
||||
if ( defined $d{probability} && defined $d{confidence} ) {
|
||||
$message .= ", prob: $d{probability}, conf: $d{confidence}";
|
||||
};
|
||||
$self->log(LOGDEBUG, $message);
|
||||
$transaction->notes('dspam', \%d);
|
||||
return \%d;
|
||||
};
|
||||
|
||||
sub get_filter_cmd {
|
||||
my ($self, $transaction, $user) = @_;
|
||||
|
||||
@ -291,51 +382,23 @@ sub get_filter_cmd {
|
||||
my $min_score = $self->{_args}->{learn_from_sa} or return $default;
|
||||
|
||||
#$self->log(LOGDEBUG, "attempting to learn from SA");
|
||||
my $sa_status = $transaction->header->get('X-Spam-Status');
|
||||
|
||||
if ( ! $sa_status ) {
|
||||
$self->log(LOGERROR, "dspam learn_from_sa was set but no X-Spam-Status header detected");
|
||||
return $default;
|
||||
};
|
||||
chomp $sa_status;
|
||||
my $sa = $transaction->notes('spamassassin' );
|
||||
return $default if ! $sa || ! $sa->{is_spam};
|
||||
|
||||
my ($is_spam,$score,$autolearn) = $sa_status =~ /^(yes|no), score=([\d\.\-]+)\s.*?autolearn=([\w]+)/i;
|
||||
$self->log(LOGINFO, "sa_status: $sa_status; $is_spam; $autolearn");
|
||||
|
||||
$is_spam = lc($is_spam);
|
||||
$autolearn = lc($autolearn);
|
||||
|
||||
if ( $is_spam eq 'yes' && $score < $min_score ) {
|
||||
$self->log(LOGWARN, "SA spam score of $score is less than $min_score, skipping autolearn");
|
||||
if ( $sa->{is_spam} eq 'Yes' && $sa->{score} < $min_score ) {
|
||||
$self->log(LOGNOTICE, "SA score $sa->{score} < $min_score, skip autolearn");
|
||||
return $default;
|
||||
};
|
||||
|
||||
if ( $is_spam eq 'yes' && $autolearn eq 'spam' ) {
|
||||
if ( $sa->{is_spam} eq 'Yes' && $sa->{autolearn} eq 'spam' ) {
|
||||
return "$dspam_bin --user $user --mode=tum --source=corpus --class=spam --deliver=summary --stdout";
|
||||
}
|
||||
elsif ( $is_spam eq 'no' && $autolearn eq 'ham' ) {
|
||||
elsif ( $sa->{is_spam} eq 'No' && $sa->{autolearn} eq 'ham' ) {
|
||||
return "$dspam_bin --user $user --mode=tum --source=corpus --class=innocent --deliver=summary --stdout";
|
||||
};
|
||||
|
||||
return $default;
|
||||
};
|
||||
|
||||
sub _cleanup_spam_header {
|
||||
my ($self, $transaction, $header_name) = @_;
|
||||
|
||||
my $action = 'rename';
|
||||
if ( $self->{_args}->{leave_old_headers} ) {
|
||||
$action = lc($self->{_args}->{leave_old_headers});
|
||||
};
|
||||
|
||||
return unless $action eq 'drop' || $action eq 'rename';
|
||||
|
||||
my $old_header_name = $header_name;
|
||||
$old_header_name = ($old_header_name =~ s/^X-//) ? "X-Old-$old_header_name" : "Old-$old_header_name";
|
||||
|
||||
for my $header ( $transaction->header->get($header_name) ) {
|
||||
$transaction->header->add($old_header_name, $header) if $action eq 'rename';
|
||||
$transaction->header->delete($header_name);
|
||||
}
|
||||
}
|
||||
|
||||
|
97
t/plugin_tests/dspam
Normal file
97
t/plugin_tests/dspam
Normal file
@ -0,0 +1,97 @@
|
||||
#!perl -w
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Mail::Header;
|
||||
use Qpsmtpd::Constants;
|
||||
|
||||
my $r;
|
||||
|
||||
sub register_tests {
|
||||
my $self = shift;
|
||||
|
||||
$self->register_test('test_get_filter_cmd', 2);
|
||||
$self->register_test('test_get_dspam_results', 6);
|
||||
$self->register_test('test_dspam_reject', 6);
|
||||
}
|
||||
|
||||
sub test_dspam_reject {
|
||||
my $self = shift;
|
||||
|
||||
my $transaction = $self->qp->transaction;
|
||||
|
||||
# reject not set
|
||||
$transaction->notes('dspam', { class=> 'Spam', probability => .99, confidence=>1 } );
|
||||
($r) = $self->dspam_reject( $transaction );
|
||||
cmp_ok( $r, '==', DECLINED, "dspam_reject ($r)");
|
||||
|
||||
# reject exceeded
|
||||
$self->{_args}->{reject} = .95;
|
||||
$transaction->notes('dspam', { class=> 'Spam', probability => .99, confidence=>1 } );
|
||||
($r) = $self->dspam_reject( $transaction );
|
||||
cmp_ok( $r, '==', DENY, "dspam_reject ($r)");
|
||||
|
||||
# below reject threshold
|
||||
$transaction->notes('dspam', { class=> 'Spam', probability => .94, confidence=>1 } );
|
||||
($r) = $self->dspam_reject( $transaction );
|
||||
cmp_ok( $r, '==', DECLINED, "dspam_reject ($r)");
|
||||
|
||||
# requires agreement
|
||||
$self->{_args}->{reject} = 'agree';
|
||||
$transaction->notes('spamassassin', { is_spam => 'Yes' } );
|
||||
$transaction->notes('dspam', { class=> 'Spam', probability => .90, confidence=>1 } );
|
||||
($r) = $self->dspam_reject( $transaction );
|
||||
cmp_ok( $r, '==', DENY, "dspam_reject ($r)");
|
||||
|
||||
# requires agreement
|
||||
$transaction->notes('spamassassin', { is_spam => 'No' } );
|
||||
$transaction->notes('dspam', { class=> 'Spam', probability => .96, confidence=>1 } );
|
||||
($r) = $self->dspam_reject( $transaction );
|
||||
cmp_ok( $r, '==', DECLINED, "dspam_reject ($r)");
|
||||
|
||||
# requires agreement
|
||||
$transaction->notes('spamassassin', { is_spam => 'Yes' } );
|
||||
$transaction->notes('dspam', { class=> 'Innocent', probability => .96, confidence=>1 } );
|
||||
($r) = $self->dspam_reject( $transaction );
|
||||
cmp_ok( $r, '==', DECLINED, "dspam_reject ($r)");
|
||||
};
|
||||
|
||||
sub test_get_dspam_results {
|
||||
my $self = shift;
|
||||
|
||||
my $transaction = $self->qp->transaction;
|
||||
my $header = Mail::Header->new(Modify => 0, MailFrom => "COERCE");
|
||||
$transaction->header( $header );
|
||||
|
||||
my @dspam_sample_headers = (
|
||||
'Innocent, probability=0.0000, confidence=0.69',
|
||||
'Innocent, probability=0.0000, confidence=0.85',
|
||||
'Innocent, probability=0.0023, confidence=1.00',
|
||||
'Spam, probability=1.0000, confidence=0.87',
|
||||
'Spam, probability=1.0000, confidence=0.99',
|
||||
'Whitelisted',
|
||||
);
|
||||
|
||||
foreach my $header ( @dspam_sample_headers ) {
|
||||
$transaction->header->delete('X-DSPAM-Result');
|
||||
$transaction->header->add('X-DSPAM-Result', $header);
|
||||
my $r = $self->get_dspam_results($transaction);
|
||||
ok( ref $r, "get_dspam_results ($header)" );
|
||||
#warn Data::Dumper::Dumper($r);
|
||||
};
|
||||
};
|
||||
|
||||
sub test_get_filter_cmd {
|
||||
my $self = shift;
|
||||
|
||||
my $transaction = $self->qp->transaction;
|
||||
my $dspam = "/usr/local/bin/dspam";
|
||||
$self->{_args}{dspam_bin} = $dspam;
|
||||
|
||||
foreach my $user ( qw/ smtpd matt@example.com / ) {
|
||||
my $answer = "$dspam --user smtpd --mode=tum --process --deliver=summary --stdout";
|
||||
my $r = $self->get_filter_cmd($transaction, 'smtpd');
|
||||
cmp_ok( $r, 'eq', $answer, "get_filter_cmd $user" );
|
||||
};
|
||||
};
|
Loading…
Reference in New Issue
Block a user