342 lines
10 KiB
Plaintext
342 lines
10 KiB
Plaintext
|
#!perl -Tw
|
||
|
|
||
|
=head1 NAME
|
||
|
|
||
|
dspam - dspam integration for qpsmtpd
|
||
|
|
||
|
=head1 DESCRIPTION
|
||
|
|
||
|
qpsmtpd plugin that uses dspam to classify messages. Can use SpamAssassin to
|
||
|
train dspam.
|
||
|
|
||
|
Adds the X-DSPAM-Result and X-DSPAM-Signature headers to messages. The latter is essential for
|
||
|
training dspam and the former is useful to MDAs, MUAs, and humans.
|
||
|
|
||
|
=head1 TRAINING DSPAM
|
||
|
|
||
|
To get dspam into a useful state, it must be trained. The best method way to
|
||
|
train dspam is to feed it two large equal sized corpuses of spam and ham from
|
||
|
your mail server. The dspam authors suggest avoiding public corpuses. I do
|
||
|
this as follows:
|
||
|
|
||
|
=over 4
|
||
|
|
||
|
=item learn from SpamAssassin
|
||
|
|
||
|
See the docs on the learn_from_sa feature in the CONFIG section.
|
||
|
|
||
|
=item daily training
|
||
|
|
||
|
I have a script that crawls the contents of every users maildir each night.
|
||
|
The script builds two lists of messages: ham and spam.
|
||
|
|
||
|
The spam message list consists of all read messages in folders named Spam
|
||
|
that have changed since the last spam learning run (normally 1 day).
|
||
|
|
||
|
The ham message list consists of read messages in any folder not named like
|
||
|
Spam, Junk, Trash, or Deleted. This catches messages that users have read
|
||
|
and left in their inbox, filed away into subfolders, and
|
||
|
|
||
|
=item on-the-fly training
|
||
|
|
||
|
=back
|
||
|
|
||
|
|
||
|
|
||
|
=head1 CONFIG
|
||
|
|
||
|
=over 4
|
||
|
|
||
|
=item dspam_bin
|
||
|
|
||
|
The path to the dspam binary. If yours is installed somewhere other
|
||
|
than /usr/local/bin/dspam, you'll need to set this.
|
||
|
|
||
|
=item learn_from_sa
|
||
|
|
||
|
Dspam can be trained by SpamAssassin. This relationship between them requires
|
||
|
attention to several important details:
|
||
|
|
||
|
=over 4
|
||
|
|
||
|
=item 1
|
||
|
|
||
|
dspam must be listed B<after> spamassassin in the config/plugins file.
|
||
|
Because SA runs first, I crank the SA reject_threshold up above 100 so that
|
||
|
all spam messages will be used to train dspam.
|
||
|
|
||
|
Once dspam is trained and errors are rare, I plan to run dspam first and
|
||
|
reduce the SA load.
|
||
|
|
||
|
=item 2
|
||
|
|
||
|
Autolearn must be enabled and configured in SpamAssassin. SA autolearn
|
||
|
preferences will determine whether a message is learned as spam or innocent
|
||
|
by dspam. The settings to pay careful attention to in your SA local.cf file
|
||
|
are bayes_auto_learn_threshold_spam and bayes_auto_learn_threshold_nonspam.
|
||
|
Make sure they are both set to conservative values that are certain to
|
||
|
yield no false positives.
|
||
|
|
||
|
If you are using learn_from_sa and reject, then messages that exceed the SA
|
||
|
threshholds will cause dspam to reject them. Again I say, make sure them SA
|
||
|
autolearn threshholds are set high enough to avoid false positives.
|
||
|
|
||
|
=item 3
|
||
|
|
||
|
dspam must be configured and working properly. I have modified the following
|
||
|
dspam values on my system:
|
||
|
|
||
|
=over 4
|
||
|
|
||
|
=item mysql storage
|
||
|
|
||
|
=item Trust smtpd
|
||
|
|
||
|
=item TrainingMode tum
|
||
|
|
||
|
=item Tokenizer osb
|
||
|
|
||
|
=item Preference "trainingMode=TOE"
|
||
|
|
||
|
=item Preference "spamAction=deliver"
|
||
|
|
||
|
=item Preference "signatureLocation=headers"
|
||
|
|
||
|
=item TrainPristine off
|
||
|
|
||
|
=item ParseToHeaders off
|
||
|
|
||
|
=back
|
||
|
|
||
|
Of those changes, the most important is the signature location. This plugin
|
||
|
only supports storing the signature in the headers. If you want to train dspam
|
||
|
after delivery (ie, users moving messages to/from spam folders), then the
|
||
|
dspam signature must be in the headers.
|
||
|
|
||
|
=back
|
||
|
|
||
|
=item reject
|
||
|
|
||
|
Set to a floating point value between 0 and 1.00 where 0 is no confidence
|
||
|
and 1.0 is 100% confidence.
|
||
|
|
||
|
If dspam's confidence is greater than or equal to this threshold, the
|
||
|
message will be rejected.
|
||
|
|
||
|
=back
|
||
|
|
||
|
|
||
|
=head1 MULTIPLE RECIPIENT BEHAVIOR
|
||
|
|
||
|
For messages with multiple recipients, the user that dspam is running as will
|
||
|
be the dspam username.
|
||
|
|
||
|
When messages have a single recipient, the recipient address is used as the
|
||
|
dspam username. For dspam to trust qpsmtpd with modifying the username, you
|
||
|
B<must> add the username that qpsmtpd is running to to the dspamd.conf file.
|
||
|
|
||
|
ie, (Trust smtpd).
|
||
|
|
||
|
=head1 CHANGES
|
||
|
|
||
|
=cut
|
||
|
|
||
|
use strict;
|
||
|
|
||
|
use Qpsmtpd::Constants;
|
||
|
use Qpsmtpd::DSN;
|
||
|
use IO::Handle;
|
||
|
use Socket qw(:DEFAULT :crlf);
|
||
|
|
||
|
sub register {
|
||
|
my ($self, $qp, @args) = @_;
|
||
|
|
||
|
$self->log(LOGERROR, "Bad parameters for the dspam plugin") if @_ % 2;
|
||
|
|
||
|
%{$self->{_args}} = @args;
|
||
|
|
||
|
$self->register_hook('data_post', 'dspam_reject')
|
||
|
if $self->{_args}->{reject};
|
||
|
}
|
||
|
|
||
|
sub hook_data_post {
|
||
|
my ($self, $transaction) = @_;
|
||
|
|
||
|
$self->log(LOGDEBUG, "check_dspam");
|
||
|
return (DECLINED) if $transaction->data_size > 500_000;
|
||
|
|
||
|
my $username = $self->select_username( $transaction );
|
||
|
my $message = $self->assemble_message($transaction);
|
||
|
my $filtercmd = $self->get_filter_cmd( $transaction, $username );
|
||
|
$self->log(LOGWARN, $filtercmd);
|
||
|
|
||
|
my $response = $self->dspam_process( $filtercmd, $message );
|
||
|
if ( ! $response ) {
|
||
|
$self->log(LOGWARN, "No response received from dspam. Check your logs for errors.");
|
||
|
return (DECLINED);
|
||
|
};
|
||
|
$self->log(LOGWARN, $response);
|
||
|
|
||
|
# X-DSPAM-Result: user@example.com; result="Spam"; class="Spam"; probability=1.0000; confidence=1.00; signature=N/A
|
||
|
# X-DSPAM-Result: smtpd; result="Innocent"; class="Innocent"; probability=0.0023; confidence=1.00; signature=4f8dae6a446008399211546
|
||
|
my ($result,$prob,$conf,$sig) = $response =~ /result=\"(Spam|Innocent)\";.*?probability=([\d\.]+); confidence=([\d\.]+); signature=(.*)/;
|
||
|
my $header_str = "$result, probability=$prob, confidence=$conf";
|
||
|
$self->log(LOGWARN, $header_str);
|
||
|
$transaction->header->add('X-DSPAM-Result', $header_str, 0);
|
||
|
|
||
|
# the signature header is required if you intend to train dspam later
|
||
|
# you must set Preference "signatureLocation=headers" in dspam.conf
|
||
|
$transaction->header->add('X-DSPAM-Signature', $sig, 0);
|
||
|
|
||
|
return (DECLINED);
|
||
|
};
|
||
|
|
||
|
sub select_username {
|
||
|
my ($self, $transaction) = @_;
|
||
|
|
||
|
my $recipient_count = scalar $transaction->recipients;
|
||
|
$self->log(LOGDEBUG, "Message has $recipient_count recipients");
|
||
|
|
||
|
if ( $recipient_count > 1 ) {
|
||
|
$self->log(LOGINFO, "skipping user prefs, $recipient_count recipients detected.");
|
||
|
return getpwuid($>);
|
||
|
};
|
||
|
|
||
|
# use the recipients email address as username. This enables user prefs
|
||
|
my $username = ($transaction->recipients)[0]->address;
|
||
|
return lc($username);
|
||
|
};
|
||
|
|
||
|
sub assemble_message {
|
||
|
my ($self, $transaction) = @_;
|
||
|
|
||
|
$transaction->body_resetpos;
|
||
|
|
||
|
my $message = "X-Envelope-From: "
|
||
|
. $transaction->sender->format . "\n"
|
||
|
. $transaction->header->as_string . "\n\n";
|
||
|
|
||
|
while (my $line = $transaction->body_getline) { $message .= $line; };
|
||
|
|
||
|
$message = join(CRLF, split/\n/, $message);
|
||
|
return $message . CRLF;
|
||
|
};
|
||
|
|
||
|
sub dspam_process {
|
||
|
my ( $self, $filtercmd, $message ) = @_;
|
||
|
|
||
|
#return $self->dspam_process_open2( $filtercmd, $message );
|
||
|
|
||
|
my ($in_fh, $out_fh);
|
||
|
if (! open($in_fh, "-|")) {
|
||
|
open($out_fh, "|$filtercmd") or die "Can't run $filtercmd: $!\n";
|
||
|
print $out_fh $message;
|
||
|
close $out_fh;
|
||
|
exit(0);
|
||
|
};
|
||
|
my $response = join('', <$in_fh>);
|
||
|
close $in_fh;
|
||
|
chomp $response;
|
||
|
|
||
|
return $response;
|
||
|
};
|
||
|
|
||
|
sub dspam_process_open2 {
|
||
|
my ( $self, $filtercmd, $message ) = @_;
|
||
|
|
||
|
# not sure why, but this is not as reliable as I'd like. What's a dspam
|
||
|
# error -5 mean anyway?
|
||
|
use FileHandle;
|
||
|
use IPC::Open2;
|
||
|
my ($dspam_in, $dspam_out);
|
||
|
my $pid = open2($dspam_out, $dspam_in, $filtercmd);
|
||
|
print $dspam_in $message;
|
||
|
close $dspam_in;
|
||
|
my $response = join('', <$dspam_out>);
|
||
|
waitpid $pid, 0;
|
||
|
chomp $response;
|
||
|
return $response;
|
||
|
};
|
||
|
|
||
|
sub dspam_reject {
|
||
|
my ($self, $transaction) = @_;
|
||
|
|
||
|
return (DECLINED) if ! $self->{_args}->{reject};
|
||
|
|
||
|
my $status = $transaction->header->get('X-DSPAM-Result') or do {
|
||
|
$self->log(LOGWARN, "dspam_reject: failed to find the dspam header");
|
||
|
return (DECLINED);
|
||
|
};
|
||
|
my ($clas,$probability,$confidence) = $status =~ m/^(Spam|Innocent), probability=([\d\.]+), confidence=([\d\.]+)/i;
|
||
|
|
||
|
$self->log(LOGDEBUG, "dspam $clas, prob: $probability, conf: $confidence");
|
||
|
|
||
|
if ( $clas eq 'Spam' && $probability == 1 && $confidence == 1 ) {
|
||
|
# default of media_unsupported is DENY, so just change the message
|
||
|
if ( $self->qp->connection->relay_client ) {
|
||
|
$self->log(LOGWARN, "allowing spam since user authenticated");
|
||
|
return DECLINED;
|
||
|
};
|
||
|
return Qpsmtpd::DSN->media_unsupported('dspam says, no spam please');
|
||
|
};
|
||
|
|
||
|
return DECLINED;
|
||
|
}
|
||
|
|
||
|
sub get_filter_cmd {
|
||
|
my ($self, $transaction, $user) = @_;
|
||
|
|
||
|
my $dspam_bin = $self->{_args}->{dspam_bin} || '/usr/local/bin/dspam';
|
||
|
my $default = "$dspam_bin --user $user --mode=tum --process --deliver=summary --stdout";
|
||
|
my $min_score = $self->{_args}->{learn_from_sa} or return $default;
|
||
|
|
||
|
#$self->log(LOGDEBUG, "attempting to learn from SA");
|
||
|
my $sa_status = $transaction->header->get('X-Spam-Status');
|
||
|
|
||
|
if ( ! $sa_status ) {
|
||
|
$self->log(LOGERROR, "dspam learn_from_sa was set but no X-Spam-Status header detected");
|
||
|
return $default;
|
||
|
};
|
||
|
chomp $sa_status;
|
||
|
|
||
|
my ($is_spam,$score,$autolearn) = $sa_status =~ /^(yes|no), score=([\d\.\-]+)\s.*?autolearn=([\w]+)/i;
|
||
|
$self->log(LOGINFO, "sa_status: $sa_status; $is_spam; $autolearn");
|
||
|
|
||
|
$is_spam = lc($is_spam);
|
||
|
$autolearn = lc($autolearn);
|
||
|
|
||
|
if ( $is_spam eq 'yes' && $score < $min_score ) {
|
||
|
$self->log(LOGWARN, "SA spam score of $score is less than $min_score, skipping autolearn");
|
||
|
return $default;
|
||
|
};
|
||
|
|
||
|
if ( $is_spam eq 'yes' && $autolearn eq 'spam' ) {
|
||
|
return "$dspam_bin --user $user --mode=tum --source=corpus --class=spam --deliver=summary --stdout";
|
||
|
}
|
||
|
elsif ( $is_spam eq 'no' && $autolearn eq 'ham' ) {
|
||
|
return "$dspam_bin --user $user --mode=tum --source=corpus --class=innocent --deliver=summary --stdout";
|
||
|
};
|
||
|
|
||
|
return $default;
|
||
|
};
|
||
|
|
||
|
sub _cleanup_spam_header {
|
||
|
my ($self, $transaction, $header_name) = @_;
|
||
|
|
||
|
my $action = 'rename';
|
||
|
if ( $self->{_args}->{leave_old_headers} ) {
|
||
|
$action = lc($self->{_args}->{leave_old_headers});
|
||
|
};
|
||
|
|
||
|
return unless $action eq 'drop' || $action eq 'rename';
|
||
|
|
||
|
my $old_header_name = $header_name;
|
||
|
$old_header_name = ($old_header_name =~ s/^X-//) ? "X-Old-$old_header_name" : "Old-$old_header_name";
|
||
|
|
||
|
for my $header ( $transaction->header->get($header_name) ) {
|
||
|
$transaction->header->add($old_header_name, $header) if $action eq 'rename';
|
||
|
$transaction->header->delete($header_name);
|
||
|
}
|
||
|
}
|
||
|
|