added dspam plugin
This commit is contained in:
parent
318c9ed4f2
commit
102e068297
@ -57,6 +57,8 @@ spamassassin
|
||||
#
|
||||
# spamassassin reject_threshold 20 munge_subject_threshold 10
|
||||
|
||||
# dspam must run after spamassassin for the learn_from_sa feature to work
|
||||
dspam learn_from_sa 7 reject 1
|
||||
|
||||
# run the clamav virus checking plugin
|
||||
# virus/clamav
|
||||
|
341
plugins/dspam
Normal file
341
plugins/dspam
Normal file
@ -0,0 +1,341 @@
|
||||
#!perl -Tw
|
||||
|
||||
=head1 NAME
|
||||
|
||||
dspam - dspam integration for qpsmtpd
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
qpsmtpd plugin that uses dspam to classify messages. Can use SpamAssassin to
|
||||
train dspam.
|
||||
|
||||
Adds the X-DSPAM-Result and X-DSPAM-Signature headers to messages. The latter is essential for
|
||||
training dspam and the former is useful to MDAs, MUAs, and humans.
|
||||
|
||||
=head1 TRAINING DSPAM
|
||||
|
||||
To get dspam into a useful state, it must be trained. The best method way to
|
||||
train dspam is to feed it two large equal sized corpuses of spam and ham from
|
||||
your mail server. The dspam authors suggest avoiding public corpuses. I do
|
||||
this as follows:
|
||||
|
||||
=over 4
|
||||
|
||||
=item learn from SpamAssassin
|
||||
|
||||
See the docs on the learn_from_sa feature in the CONFIG section.
|
||||
|
||||
=item daily training
|
||||
|
||||
I have a script that crawls the contents of every users maildir each night.
|
||||
The script builds two lists of messages: ham and spam.
|
||||
|
||||
The spam message list consists of all read messages in folders named Spam
|
||||
that have changed since the last spam learning run (normally 1 day).
|
||||
|
||||
The ham message list consists of read messages in any folder not named like
|
||||
Spam, Junk, Trash, or Deleted. This catches messages that users have read
|
||||
and left in their inbox, filed away into subfolders, and
|
||||
|
||||
=item on-the-fly training
|
||||
|
||||
=back
|
||||
|
||||
|
||||
|
||||
=head1 CONFIG
|
||||
|
||||
=over 4
|
||||
|
||||
=item dspam_bin
|
||||
|
||||
The path to the dspam binary. If yours is installed somewhere other
|
||||
than /usr/local/bin/dspam, you'll need to set this.
|
||||
|
||||
=item learn_from_sa
|
||||
|
||||
Dspam can be trained by SpamAssassin. This relationship between them requires
|
||||
attention to several important details:
|
||||
|
||||
=over 4
|
||||
|
||||
=item 1
|
||||
|
||||
dspam must be listed B<after> spamassassin in the config/plugins file.
|
||||
Because SA runs first, I crank the SA reject_threshold up above 100 so that
|
||||
all spam messages will be used to train dspam.
|
||||
|
||||
Once dspam is trained and errors are rare, I plan to run dspam first and
|
||||
reduce the SA load.
|
||||
|
||||
=item 2
|
||||
|
||||
Autolearn must be enabled and configured in SpamAssassin. SA autolearn
|
||||
preferences will determine whether a message is learned as spam or innocent
|
||||
by dspam. The settings to pay careful attention to in your SA local.cf file
|
||||
are bayes_auto_learn_threshold_spam and bayes_auto_learn_threshold_nonspam.
|
||||
Make sure they are both set to conservative values that are certain to
|
||||
yield no false positives.
|
||||
|
||||
If you are using learn_from_sa and reject, then messages that exceed the SA
|
||||
threshholds will cause dspam to reject them. Again I say, make sure them SA
|
||||
autolearn threshholds are set high enough to avoid false positives.
|
||||
|
||||
=item 3
|
||||
|
||||
dspam must be configured and working properly. I have modified the following
|
||||
dspam values on my system:
|
||||
|
||||
=over 4
|
||||
|
||||
=item mysql storage
|
||||
|
||||
=item Trust smtpd
|
||||
|
||||
=item TrainingMode tum
|
||||
|
||||
=item Tokenizer osb
|
||||
|
||||
=item Preference "trainingMode=TOE"
|
||||
|
||||
=item Preference "spamAction=deliver"
|
||||
|
||||
=item Preference "signatureLocation=headers"
|
||||
|
||||
=item TrainPristine off
|
||||
|
||||
=item ParseToHeaders off
|
||||
|
||||
=back
|
||||
|
||||
Of those changes, the most important is the signature location. This plugin
|
||||
only supports storing the signature in the headers. If you want to train dspam
|
||||
after delivery (ie, users moving messages to/from spam folders), then the
|
||||
dspam signature must be in the headers.
|
||||
|
||||
=back
|
||||
|
||||
=item reject
|
||||
|
||||
Set to a floating point value between 0 and 1.00 where 0 is no confidence
|
||||
and 1.0 is 100% confidence.
|
||||
|
||||
If dspam's confidence is greater than or equal to this threshold, the
|
||||
message will be rejected.
|
||||
|
||||
=back
|
||||
|
||||
|
||||
=head1 MULTIPLE RECIPIENT BEHAVIOR
|
||||
|
||||
For messages with multiple recipients, the user that dspam is running as will
|
||||
be the dspam username.
|
||||
|
||||
When messages have a single recipient, the recipient address is used as the
|
||||
dspam username. For dspam to trust qpsmtpd with modifying the username, you
|
||||
B<must> add the username that qpsmtpd is running to to the dspamd.conf file.
|
||||
|
||||
ie, (Trust smtpd).
|
||||
|
||||
=head1 CHANGES
|
||||
|
||||
=cut
|
||||
|
||||
use strict;
|
||||
|
||||
use Qpsmtpd::Constants;
|
||||
use Qpsmtpd::DSN;
|
||||
use IO::Handle;
|
||||
use Socket qw(:DEFAULT :crlf);
|
||||
|
||||
sub register {
|
||||
my ($self, $qp, @args) = @_;
|
||||
|
||||
$self->log(LOGERROR, "Bad parameters for the dspam plugin") if @_ % 2;
|
||||
|
||||
%{$self->{_args}} = @args;
|
||||
|
||||
$self->register_hook('data_post', 'dspam_reject')
|
||||
if $self->{_args}->{reject};
|
||||
}
|
||||
|
||||
sub hook_data_post {
|
||||
my ($self, $transaction) = @_;
|
||||
|
||||
$self->log(LOGDEBUG, "check_dspam");
|
||||
return (DECLINED) if $transaction->data_size > 500_000;
|
||||
|
||||
my $username = $self->select_username( $transaction );
|
||||
my $message = $self->assemble_message($transaction);
|
||||
my $filtercmd = $self->get_filter_cmd( $transaction, $username );
|
||||
$self->log(LOGWARN, $filtercmd);
|
||||
|
||||
my $response = $self->dspam_process( $filtercmd, $message );
|
||||
if ( ! $response ) {
|
||||
$self->log(LOGWARN, "No response received from dspam. Check your logs for errors.");
|
||||
return (DECLINED);
|
||||
};
|
||||
$self->log(LOGWARN, $response);
|
||||
|
||||
# X-DSPAM-Result: user@example.com; result="Spam"; class="Spam"; probability=1.0000; confidence=1.00; signature=N/A
|
||||
# X-DSPAM-Result: smtpd; result="Innocent"; class="Innocent"; probability=0.0023; confidence=1.00; signature=4f8dae6a446008399211546
|
||||
my ($result,$prob,$conf,$sig) = $response =~ /result=\"(Spam|Innocent)\";.*?probability=([\d\.]+); confidence=([\d\.]+); signature=(.*)/;
|
||||
my $header_str = "$result, probability=$prob, confidence=$conf";
|
||||
$self->log(LOGWARN, $header_str);
|
||||
$transaction->header->add('X-DSPAM-Result', $header_str, 0);
|
||||
|
||||
# the signature header is required if you intend to train dspam later
|
||||
# you must set Preference "signatureLocation=headers" in dspam.conf
|
||||
$transaction->header->add('X-DSPAM-Signature', $sig, 0);
|
||||
|
||||
return (DECLINED);
|
||||
};
|
||||
|
||||
sub select_username {
|
||||
my ($self, $transaction) = @_;
|
||||
|
||||
my $recipient_count = scalar $transaction->recipients;
|
||||
$self->log(LOGDEBUG, "Message has $recipient_count recipients");
|
||||
|
||||
if ( $recipient_count > 1 ) {
|
||||
$self->log(LOGINFO, "skipping user prefs, $recipient_count recipients detected.");
|
||||
return getpwuid($>);
|
||||
};
|
||||
|
||||
# use the recipients email address as username. This enables user prefs
|
||||
my $username = ($transaction->recipients)[0]->address;
|
||||
return lc($username);
|
||||
};
|
||||
|
||||
sub assemble_message {
|
||||
my ($self, $transaction) = @_;
|
||||
|
||||
$transaction->body_resetpos;
|
||||
|
||||
my $message = "X-Envelope-From: "
|
||||
. $transaction->sender->format . "\n"
|
||||
. $transaction->header->as_string . "\n\n";
|
||||
|
||||
while (my $line = $transaction->body_getline) { $message .= $line; };
|
||||
|
||||
$message = join(CRLF, split/\n/, $message);
|
||||
return $message . CRLF;
|
||||
};
|
||||
|
||||
sub dspam_process {
|
||||
my ( $self, $filtercmd, $message ) = @_;
|
||||
|
||||
#return $self->dspam_process_open2( $filtercmd, $message );
|
||||
|
||||
my ($in_fh, $out_fh);
|
||||
if (! open($in_fh, "-|")) {
|
||||
open($out_fh, "|$filtercmd") or die "Can't run $filtercmd: $!\n";
|
||||
print $out_fh $message;
|
||||
close $out_fh;
|
||||
exit(0);
|
||||
};
|
||||
my $response = join('', <$in_fh>);
|
||||
close $in_fh;
|
||||
chomp $response;
|
||||
|
||||
return $response;
|
||||
};
|
||||
|
||||
sub dspam_process_open2 {
|
||||
my ( $self, $filtercmd, $message ) = @_;
|
||||
|
||||
# not sure why, but this is not as reliable as I'd like. What's a dspam
|
||||
# error -5 mean anyway?
|
||||
use FileHandle;
|
||||
use IPC::Open2;
|
||||
my ($dspam_in, $dspam_out);
|
||||
my $pid = open2($dspam_out, $dspam_in, $filtercmd);
|
||||
print $dspam_in $message;
|
||||
close $dspam_in;
|
||||
my $response = join('', <$dspam_out>);
|
||||
waitpid $pid, 0;
|
||||
chomp $response;
|
||||
return $response;
|
||||
};
|
||||
|
||||
sub dspam_reject {
|
||||
my ($self, $transaction) = @_;
|
||||
|
||||
return (DECLINED) if ! $self->{_args}->{reject};
|
||||
|
||||
my $status = $transaction->header->get('X-DSPAM-Result') or do {
|
||||
$self->log(LOGWARN, "dspam_reject: failed to find the dspam header");
|
||||
return (DECLINED);
|
||||
};
|
||||
my ($clas,$probability,$confidence) = $status =~ m/^(Spam|Innocent), probability=([\d\.]+), confidence=([\d\.]+)/i;
|
||||
|
||||
$self->log(LOGDEBUG, "dspam $clas, prob: $probability, conf: $confidence");
|
||||
|
||||
if ( $clas eq 'Spam' && $probability == 1 && $confidence == 1 ) {
|
||||
# default of media_unsupported is DENY, so just change the message
|
||||
if ( $self->qp->connection->relay_client ) {
|
||||
$self->log(LOGWARN, "allowing spam since user authenticated");
|
||||
return DECLINED;
|
||||
};
|
||||
return Qpsmtpd::DSN->media_unsupported('dspam says, no spam please');
|
||||
};
|
||||
|
||||
return DECLINED;
|
||||
}
|
||||
|
||||
sub get_filter_cmd {
|
||||
my ($self, $transaction, $user) = @_;
|
||||
|
||||
my $dspam_bin = $self->{_args}->{dspam_bin} || '/usr/local/bin/dspam';
|
||||
my $default = "$dspam_bin --user $user --mode=tum --process --deliver=summary --stdout";
|
||||
my $min_score = $self->{_args}->{learn_from_sa} or return $default;
|
||||
|
||||
#$self->log(LOGDEBUG, "attempting to learn from SA");
|
||||
my $sa_status = $transaction->header->get('X-Spam-Status');
|
||||
|
||||
if ( ! $sa_status ) {
|
||||
$self->log(LOGERROR, "dspam learn_from_sa was set but no X-Spam-Status header detected");
|
||||
return $default;
|
||||
};
|
||||
chomp $sa_status;
|
||||
|
||||
my ($is_spam,$score,$autolearn) = $sa_status =~ /^(yes|no), score=([\d\.\-]+)\s.*?autolearn=([\w]+)/i;
|
||||
$self->log(LOGINFO, "sa_status: $sa_status; $is_spam; $autolearn");
|
||||
|
||||
$is_spam = lc($is_spam);
|
||||
$autolearn = lc($autolearn);
|
||||
|
||||
if ( $is_spam eq 'yes' && $score < $min_score ) {
|
||||
$self->log(LOGWARN, "SA spam score of $score is less than $min_score, skipping autolearn");
|
||||
return $default;
|
||||
};
|
||||
|
||||
if ( $is_spam eq 'yes' && $autolearn eq 'spam' ) {
|
||||
return "$dspam_bin --user $user --mode=tum --source=corpus --class=spam --deliver=summary --stdout";
|
||||
}
|
||||
elsif ( $is_spam eq 'no' && $autolearn eq 'ham' ) {
|
||||
return "$dspam_bin --user $user --mode=tum --source=corpus --class=innocent --deliver=summary --stdout";
|
||||
};
|
||||
|
||||
return $default;
|
||||
};
|
||||
|
||||
sub _cleanup_spam_header {
|
||||
my ($self, $transaction, $header_name) = @_;
|
||||
|
||||
my $action = 'rename';
|
||||
if ( $self->{_args}->{leave_old_headers} ) {
|
||||
$action = lc($self->{_args}->{leave_old_headers});
|
||||
};
|
||||
|
||||
return unless $action eq 'drop' || $action eq 'rename';
|
||||
|
||||
my $old_header_name = $header_name;
|
||||
$old_header_name = ($old_header_name =~ s/^X-//) ? "X-Old-$old_header_name" : "Old-$old_header_name";
|
||||
|
||||
for my $header ( $transaction->header->get($header_name) ) {
|
||||
$transaction->header->add($old_header_name, $header) if $action eq 'rename';
|
||||
$transaction->header->delete($header_name);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user