2012-04-29 10:35:59 +02:00
|
|
|
#!perl -w
|
2012-04-20 07:42:04 +02:00
|
|
|
|
|
|
|
=head1 NAME
|
|
|
|
|
|
|
|
dspam - dspam integration for qpsmtpd
|
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
Uses dspam to classify messages. Use B<spamassassin>, B<karma>, and B<naughty>
|
|
|
|
to train dspam.
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-05-06 04:58:49 +02:00
|
|
|
Adds the X-DSPAM-Result and X-DSPAM-Signature headers to messages. The latter is essential for
|
2012-04-20 07:42:04 +02:00
|
|
|
training dspam and the former is useful to MDAs, MUAs, and humans.
|
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
Adds a transaction note to the qpsmtpd transaction. The note is a hashref
|
2012-05-06 04:58:49 +02:00
|
|
|
with at least the 'class' field (Spam,Innocent,Whitelisted). It will normally
|
2012-06-22 11:38:01 +02:00
|
|
|
contain a probability and confidence rating.
|
2012-05-06 04:58:49 +02:00
|
|
|
|
2012-04-20 07:42:04 +02:00
|
|
|
=head1 TRAINING DSPAM
|
|
|
|
|
2012-06-23 05:44:55 +02:00
|
|
|
If you enable dspam rejection without training first, you will lose valid
|
|
|
|
mail. The dspam false positive rate is high when untrained. The good news is;
|
|
|
|
dspam learns very, very fast.
|
2012-05-06 04:58:49 +02:00
|
|
|
|
2012-06-23 05:44:55 +02:00
|
|
|
The best method way to train dspam is to feed it two large equal sized
|
|
|
|
corpuses of spam and ham from your mail server. The dspam authors suggest
|
|
|
|
avoiding public corpuses. I train dspam as follows:
|
2012-04-20 07:42:04 +02:00
|
|
|
|
|
|
|
=over 4
|
|
|
|
|
|
|
|
=item learn from SpamAssassin
|
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
See the SPAMASSASSIN section.
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-05-06 04:58:49 +02:00
|
|
|
=item periodic training
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-05-06 04:58:49 +02:00
|
|
|
I have a script that searches the contents of every users maildir. Any read
|
|
|
|
messages that have changed since the last processing run are learned as ham
|
|
|
|
or spam.
|
2012-04-20 07:42:04 +02:00
|
|
|
|
|
|
|
The ham message list consists of read messages in any folder not named like
|
2012-05-06 04:58:49 +02:00
|
|
|
Spam, Junk, Trash, or Deleted. This catches messages that users have read
|
|
|
|
and left in their inbox or filed away into subfolders.
|
2012-04-20 07:42:04 +02:00
|
|
|
|
|
|
|
=item on-the-fly training
|
|
|
|
|
2012-05-06 04:58:49 +02:00
|
|
|
The dovecot IMAP server has an antispam plugin that will train dspam when
|
|
|
|
messages are moved to/from the Spam folder.
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-05-06 04:58:49 +02:00
|
|
|
=back
|
2012-04-20 07:42:04 +02:00
|
|
|
|
|
|
|
=head1 CONFIG
|
|
|
|
|
2012-05-06 04:58:49 +02:00
|
|
|
=head2 dspam_bin
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-05-06 04:58:49 +02:00
|
|
|
The path to the dspam binary. If yours is installed somewhere other
|
2012-06-22 11:38:01 +02:00
|
|
|
than /usr/local/bin/dspam, set this.
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
=head2 autolearn [ naughty | karma | spamassassin | any ]
|
2012-04-20 07:42:04 +02:00
|
|
|
|
|
|
|
=over 4
|
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
=item naughty
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
learn naughty messages as spam (see plugins/naughty)
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
=item karma
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
learn messages with negative karma as spam (see plugins/karma)
|
|
|
|
|
|
|
|
=item spamassassin
|
|
|
|
|
2012-06-23 05:44:55 +02:00
|
|
|
learn from spamassassins messages with autolearn=(ham|spam). See SPAMASSASSIN.
|
2012-06-22 11:38:01 +02:00
|
|
|
|
|
|
|
=item any
|
|
|
|
|
|
|
|
all of the above, and any future tests too!
|
|
|
|
|
|
|
|
=back
|
|
|
|
|
|
|
|
=head2 reject
|
|
|
|
|
|
|
|
Set to a floating point value between 0 and 1.00 where 0 is no confidence
|
|
|
|
and 1.0 is 100% confidence.
|
|
|
|
|
|
|
|
If dspam's confidence is greater than or equal to this threshold, the
|
|
|
|
message will be rejected. The default is 1.00.
|
|
|
|
|
|
|
|
dspam reject .95
|
|
|
|
|
|
|
|
To only reject mail if dspam and spamassassin both think the message is spam,
|
|
|
|
set I<reject agree>.
|
|
|
|
|
|
|
|
=head2 reject_type
|
|
|
|
|
|
|
|
reject_type [ perm | temp | disconnect ]
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
By default, rejects are permanent (5xx). Set I<reject_type temp> to
|
|
|
|
defer mail instead of rejecting it.
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
Set I<reject_type disconnect> if you'd prefer to immediately disconnect
|
|
|
|
the connection when a spam is encountered. This prevents the remote server
|
|
|
|
from issuing a reset and attempting numerous times in a single connection.
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
=head1 dspam.conf
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
dspam must be configured and working properly. I had to modify the following
|
|
|
|
settings on my system:
|
2012-04-20 07:42:04 +02:00
|
|
|
|
|
|
|
=over 4
|
|
|
|
|
|
|
|
=item mysql storage
|
|
|
|
|
|
|
|
=item Trust smtpd
|
|
|
|
|
|
|
|
=item TrainingMode tum
|
|
|
|
|
|
|
|
=item Tokenizer osb
|
|
|
|
|
|
|
|
=item Preference "trainingMode=TOE"
|
|
|
|
|
|
|
|
=item Preference "spamAction=deliver"
|
|
|
|
|
|
|
|
=item Preference "signatureLocation=headers"
|
|
|
|
|
|
|
|
=item TrainPristine off
|
|
|
|
|
|
|
|
=item ParseToHeaders off
|
|
|
|
|
|
|
|
=back
|
|
|
|
|
|
|
|
Of those changes, the most important is the signature location. This plugin
|
|
|
|
only supports storing the signature in the headers. If you want to train dspam
|
|
|
|
after delivery (ie, users moving messages to/from spam folders), then the
|
|
|
|
dspam signature must be in the headers.
|
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
When using the dspam MySQL backend, use InnoDB tables. DSPAM training
|
2012-06-23 05:44:55 +02:00
|
|
|
is dramatically slowed by MyISAM table locks and dspam requires a lot
|
2012-05-06 04:58:49 +02:00
|
|
|
of training. InnoDB has row level locking and updates are much faster.
|
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
=head1 DSPAM periodic maintenance
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
Install this cron job to clean up your DSPAM database.
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
http://dspam.git.sourceforge.net/git/gitweb.cgi?p=dspam/dspam;a=tree;f=contrib/dspam_maintenance;hb=HEAD
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
=head1 SPAMASSASSIN
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
DSPAM can be trained by SpamAssassin. This relationship between them requires
|
|
|
|
attention to several important details:
|
|
|
|
|
|
|
|
=over 4
|
|
|
|
|
|
|
|
=item 1
|
|
|
|
|
|
|
|
dspam must be listed B<after> spamassassin in the config/plugins file.
|
|
|
|
Because SA runs first, I set the SA reject_threshold up above 100 so that
|
|
|
|
all spam messages will be used to train dspam.
|
|
|
|
|
|
|
|
Once dspam is trained and errors are rare, I plan to run dspam first and
|
|
|
|
reduce the SA load.
|
|
|
|
|
|
|
|
=item 2
|
|
|
|
|
2012-06-23 05:44:55 +02:00
|
|
|
For I<autolearn spamassassin> to work, autolearn must be enabled and
|
|
|
|
configured in SpamAssassin. SA autolearn will
|
2012-06-22 11:38:01 +02:00
|
|
|
determine if a message is learned by dspam. The settings to pay careful
|
|
|
|
attention to in your SA local.cf file are I<bayes_auto_learn_threshold_spam>
|
|
|
|
and I<bayes_auto_learn_threshold_nonspam>. Make sure they are set to
|
|
|
|
conservative values that will yield no false positives.
|
|
|
|
|
2012-06-23 05:44:55 +02:00
|
|
|
If you are using I<autolearn spamassassin> and I<reject>, messages that exceed
|
2012-06-22 11:38:01 +02:00
|
|
|
the SA threshholds will cause dspam to reject them. Again I say, make sure
|
|
|
|
the SA autolearn threshholds are set high enough to avoid false positives.
|
|
|
|
|
|
|
|
=back
|
2012-04-20 07:42:04 +02:00
|
|
|
|
|
|
|
=head1 MULTIPLE RECIPIENT BEHAVIOR
|
|
|
|
|
|
|
|
For messages with multiple recipients, the user that dspam is running as will
|
|
|
|
be the dspam username.
|
|
|
|
|
|
|
|
When messages have a single recipient, the recipient address is used as the
|
|
|
|
dspam username. For dspam to trust qpsmtpd with modifying the username, you
|
|
|
|
B<must> add the username that qpsmtpd is running to to the dspamd.conf file.
|
|
|
|
|
|
|
|
ie, (Trust smtpd).
|
|
|
|
|
|
|
|
=head1 CHANGES
|
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
2012-06 - Matt Simerson - added karma & naughty learning support
|
|
|
|
- worked around the DESTROY bug in dspam_process
|
|
|
|
|
2012-05-06 04:58:49 +02:00
|
|
|
=head1 AUTHOR
|
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
2012 - Matt Simerson
|
2012-05-06 04:58:49 +02:00
|
|
|
|
2012-04-20 07:42:04 +02:00
|
|
|
=cut
|
|
|
|
|
|
|
|
use strict;
|
2012-05-06 04:58:49 +02:00
|
|
|
use warnings;
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2013-03-16 06:12:50 +01:00
|
|
|
use lib 'lib';
|
|
|
|
|
2012-04-20 07:42:04 +02:00
|
|
|
use Qpsmtpd::Constants;
|
|
|
|
use Qpsmtpd::DSN;
|
|
|
|
use IO::Handle;
|
|
|
|
use Socket qw(:DEFAULT :crlf);
|
|
|
|
|
|
|
|
sub register {
|
2012-06-23 05:44:55 +02:00
|
|
|
my ($self, $qp) = (shift, shift);
|
2012-04-20 07:42:04 +02:00
|
|
|
|
|
|
|
$self->log(LOGERROR, "Bad parameters for the dspam plugin") if @_ % 2;
|
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
$self->{_args} = { @_ };
|
|
|
|
$self->{_args}{reject} = 1 if ! defined $self->{_args}{reject};
|
|
|
|
$self->{_args}{reject_type} ||= 'perm';
|
2012-06-27 23:42:34 +02:00
|
|
|
$self->{_args}{dspam_bin} ||= '/usr/local/bin/dspam';
|
|
|
|
|
2013-03-28 00:37:40 +01:00
|
|
|
$self->get_dspam_bin() or return DECLINED;
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
$self->register_hook('data_post', 'data_post_handler');
|
2012-04-20 07:42:04 +02:00
|
|
|
}
|
|
|
|
|
2013-03-28 00:37:40 +01:00
|
|
|
sub get_dspam_bin {
|
|
|
|
my $self = shift;
|
|
|
|
|
|
|
|
my $bin = $self->{_args}{dspam_bin};
|
|
|
|
if ( ! -e $bin ) {
|
|
|
|
$self->log(LOGERROR, "error, dspam CLI binary not found: install dspam and/or set dspam_bin");
|
|
|
|
return;
|
|
|
|
};
|
|
|
|
if ( ! -x $bin ) {
|
|
|
|
$self->log(LOGERROR, "error, no permission to run $bin");
|
|
|
|
return;
|
|
|
|
};
|
|
|
|
return $bin;
|
|
|
|
};
|
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
sub data_post_handler {
|
|
|
|
my $self = shift;
|
|
|
|
my $transaction = shift || $self->qp->transaction;
|
|
|
|
|
|
|
|
return (DECLINED) if $self->is_immune();
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-05-06 04:58:49 +02:00
|
|
|
if ( $transaction->data_size > 500_000 ) {
|
2012-06-22 11:38:01 +02:00
|
|
|
$self->log(LOGINFO, "skip, too big (" . $transaction->data_size . ")" );
|
2012-05-06 04:58:49 +02:00
|
|
|
return (DECLINED);
|
|
|
|
};
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-06-27 23:42:34 +02:00
|
|
|
my $user = $self->select_username( $transaction );
|
|
|
|
my $bin = $self->{_args}{dspam_bin};
|
|
|
|
my $filtercmd = "$bin --user $user --mode=tum --process --deliver=summary --stdout";
|
2012-05-06 04:58:49 +02:00
|
|
|
$self->log(LOGDEBUG, $filtercmd);
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
my $response = $self->dspam_process( $filtercmd, $transaction );
|
2012-06-23 05:44:55 +02:00
|
|
|
if ( ! $response->{result} ) {
|
2012-06-25 08:51:36 +02:00
|
|
|
$self->log(LOGWARN, "error, no dspam response. Check logs for errors.");
|
2012-04-20 07:42:04 +02:00
|
|
|
return (DECLINED);
|
|
|
|
};
|
|
|
|
|
2012-06-25 08:51:36 +02:00
|
|
|
$transaction->notes('dspam', $response);
|
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
$self->attach_headers( $response, $transaction );
|
2012-06-23 05:44:55 +02:00
|
|
|
$self->autolearn( $response, $transaction );
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
return $self->log_and_return( $transaction );
|
2012-04-20 07:42:04 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
sub select_username {
|
|
|
|
my ($self, $transaction) = @_;
|
|
|
|
|
|
|
|
my $recipient_count = scalar $transaction->recipients;
|
|
|
|
$self->log(LOGDEBUG, "Message has $recipient_count recipients");
|
|
|
|
|
|
|
|
if ( $recipient_count > 1 ) {
|
2012-11-20 07:40:57 +01:00
|
|
|
$self->log(LOGINFO, "multiple recipients ($recipient_count), ignoring user prefs");
|
2012-04-20 07:42:04 +02:00
|
|
|
return getpwuid($>);
|
|
|
|
};
|
|
|
|
|
|
|
|
# use the recipients email address as username. This enables user prefs
|
|
|
|
my $username = ($transaction->recipients)[0]->address;
|
|
|
|
return lc($username);
|
|
|
|
};
|
|
|
|
|
|
|
|
sub assemble_message {
|
|
|
|
my ($self, $transaction) = @_;
|
|
|
|
|
|
|
|
my $message = "X-Envelope-From: "
|
|
|
|
. $transaction->sender->format . "\n"
|
|
|
|
. $transaction->header->as_string . "\n\n";
|
|
|
|
|
2012-06-25 08:51:36 +02:00
|
|
|
$transaction->body_resetpos;
|
2012-04-20 07:42:04 +02:00
|
|
|
while (my $line = $transaction->body_getline) { $message .= $line; };
|
|
|
|
|
2013-03-16 06:12:50 +01:00
|
|
|
$message = join(CRLF, split /\n/, $message);
|
2012-04-20 07:42:04 +02:00
|
|
|
return $message . CRLF;
|
|
|
|
};
|
|
|
|
|
2012-06-25 08:51:36 +02:00
|
|
|
sub parse_response {
|
|
|
|
my $self = shift;
|
|
|
|
my $response = shift or do {
|
|
|
|
$self->log( LOGDEBUG, "missing dspam response!" );
|
|
|
|
return;
|
|
|
|
};
|
|
|
|
|
|
|
|
# example DSPAM results:
|
|
|
|
# user@example.com; result="Spam"; class="Spam"; probability=1.0000; confidence=1.00; signature=N/A
|
|
|
|
# smtpd; result="Innocent"; class="Innocent"; probability=0.0023; confidence=1.00; signature=4f8dae6a446008399211546
|
2012-06-22 11:38:01 +02:00
|
|
|
|
2012-06-25 08:51:36 +02:00
|
|
|
#return $self->parse_response_regexp( $response ); # probably slower
|
2012-06-23 05:44:55 +02:00
|
|
|
|
2012-11-20 07:40:57 +01:00
|
|
|
my ($user, $result, $class, $prob, $conf, $sig) = split /; /, $response;
|
2012-06-25 08:51:36 +02:00
|
|
|
|
2012-11-20 07:40:57 +01:00
|
|
|
(undef, $result) = split /=/, $result;
|
|
|
|
(undef, $class ) = split /=/, $class;
|
|
|
|
(undef, $prob ) = split /=/, $prob;
|
|
|
|
(undef, $conf ) = split /=/, $conf;
|
|
|
|
(undef, $sig ) = split /=/, $sig;
|
2012-06-25 08:51:36 +02:00
|
|
|
|
|
|
|
$result = substr($result, 1, -1); # strip off quotes
|
|
|
|
$class = substr($class, 1, -1);
|
|
|
|
|
|
|
|
return {
|
|
|
|
class => $class,
|
|
|
|
result => $result,
|
|
|
|
probability => $prob,
|
|
|
|
confidence => $conf,
|
|
|
|
signature => $sig,
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
sub parse_response_regexp {
|
|
|
|
my ($self, $response) = @_;
|
|
|
|
|
|
|
|
my ($result, $class, $prob, $conf, $sig) = $response =~ /
|
|
|
|
result=\"(Spam|Innocent)\";\s
|
|
|
|
class=\"(Spam|Innocent)\";\s
|
|
|
|
probability=([\d\.]+);\s
|
|
|
|
confidence=([\d\.]+);\s
|
|
|
|
signature=(.*)
|
|
|
|
/x;
|
2012-06-23 05:44:55 +02:00
|
|
|
|
|
|
|
return {
|
2012-06-25 08:51:36 +02:00
|
|
|
class => $class,
|
|
|
|
result => $result,
|
|
|
|
probability => $prob,
|
|
|
|
confidence => $conf,
|
|
|
|
signature => $sig,
|
2012-06-23 05:44:55 +02:00
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2012-06-25 08:51:36 +02:00
|
|
|
sub dspam_process {
|
|
|
|
my ( $self, $filtercmd, $transaction ) = @_;
|
|
|
|
|
|
|
|
my $response = $self->dspam_process_backticks( $filtercmd );
|
|
|
|
#my $response = $self->dspam_process_open2( $filtercmd, $transaction );
|
|
|
|
#my $response = $self->dspam_process_fork( $filtercmd );
|
|
|
|
|
|
|
|
return $self->parse_response( $response );
|
|
|
|
};
|
|
|
|
|
2012-06-23 05:44:55 +02:00
|
|
|
sub dspam_process_fork {
|
|
|
|
my ( $self, $filtercmd, $transaction ) = @_;
|
2012-06-22 11:38:01 +02:00
|
|
|
|
|
|
|
# yucky. This method (which forks) exercises a bug in qpsmtpd. When the
|
|
|
|
# child exits, the Transaction::DESTROY method is called, which deletes
|
|
|
|
# the spooled file from disk. The contents of $self->qp->transaction
|
|
|
|
# needed to spool it again are also destroyed. Don't use this.
|
|
|
|
my $message = $self->assemble_message( $transaction );
|
|
|
|
my $in_fh;
|
|
|
|
if (! open($in_fh, '-|')) { # forks child for writing
|
|
|
|
open(my $out_fh, "|$filtercmd") or die "Can't run $filtercmd: $!\n";
|
2012-04-20 07:42:04 +02:00
|
|
|
print $out_fh $message;
|
|
|
|
close $out_fh;
|
|
|
|
exit(0);
|
|
|
|
};
|
2012-05-06 04:58:49 +02:00
|
|
|
my $response = <$in_fh>;
|
2012-04-20 07:42:04 +02:00
|
|
|
close $in_fh;
|
|
|
|
chomp $response;
|
2012-05-06 04:58:49 +02:00
|
|
|
$self->log(LOGDEBUG, $response);
|
2012-04-20 07:42:04 +02:00
|
|
|
return $response;
|
|
|
|
};
|
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
sub dspam_process_backticks {
|
|
|
|
my ( $self, $filtercmd ) = @_;
|
|
|
|
|
2012-06-25 08:51:36 +02:00
|
|
|
my $transaction = $self->qp->transaction;
|
|
|
|
|
|
|
|
my $message = $self->temp_file();
|
|
|
|
open my $fh, '>', $message;
|
|
|
|
print $fh "X-Envelope-From: "
|
|
|
|
. $transaction->sender->format . CRLF
|
|
|
|
. $transaction->header->as_string . CRLF . CRLF;
|
|
|
|
|
|
|
|
$transaction->body_resetpos;
|
|
|
|
while (my $line = $transaction->body_getline) { print $fh $line; };
|
|
|
|
|
|
|
|
close $fh;
|
|
|
|
|
|
|
|
my ($line1) = split /[\r|\n]/, `$filtercmd < $message`;
|
|
|
|
$self->log(LOGDEBUG, $line1);
|
|
|
|
return $line1;
|
2012-06-22 11:38:01 +02:00
|
|
|
};
|
|
|
|
|
2012-04-20 07:42:04 +02:00
|
|
|
sub dspam_process_open2 {
|
2012-06-22 11:38:01 +02:00
|
|
|
my ( $self, $filtercmd, $transaction ) = @_;
|
|
|
|
|
|
|
|
my $message = $self->assemble_message( $transaction );
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-05-06 04:58:49 +02:00
|
|
|
# not sure why, but this is not as reliable as I'd like. What's a dspam
|
2012-04-20 07:42:04 +02:00
|
|
|
# error -5 mean anyway?
|
|
|
|
use FileHandle;
|
2012-06-25 08:51:36 +02:00
|
|
|
use IPC::Open3;
|
|
|
|
my ($read, $write, $err);
|
|
|
|
use Symbol 'gensym'; $err = gensym;
|
|
|
|
my $pid = open3($write, $read, $err, $filtercmd);
|
|
|
|
print $write $message;
|
|
|
|
close $write;
|
2012-05-06 04:58:49 +02:00
|
|
|
#my $response = join('', <$dspam_out>); # get full response
|
2012-06-25 08:51:36 +02:00
|
|
|
my $response = <$read>; # get first line only
|
2012-04-20 07:42:04 +02:00
|
|
|
waitpid $pid, 0;
|
2012-06-25 08:51:36 +02:00
|
|
|
my $child_exit_status = $? >> 8;
|
|
|
|
#$self->log(LOGINFO, "exit status: $child_exit_status");
|
|
|
|
if ( $response ) {
|
|
|
|
chomp $response;
|
|
|
|
$self->log(LOGDEBUG, $response);
|
|
|
|
};
|
|
|
|
my $err_msg = <$err>;
|
|
|
|
if ( $err_msg ) {
|
|
|
|
$self->log(LOGDEBUG, $err_msg );
|
|
|
|
};
|
2012-04-20 07:42:04 +02:00
|
|
|
return $response;
|
|
|
|
};
|
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
sub log_and_return {
|
|
|
|
my $self = shift;
|
|
|
|
my $transaction = shift || $self->qp->transaction;
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-05-09 07:03:21 +02:00
|
|
|
my $d = $self->get_dspam_results( $transaction ) or return DECLINED;
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-05-06 04:58:49 +02:00
|
|
|
if ( ! $d->{class} ) {
|
2012-06-22 11:38:01 +02:00
|
|
|
$self->log(LOGWARN, "skip, no dspam class detected");
|
2012-05-06 04:58:49 +02:00
|
|
|
return DECLINED;
|
2012-04-20 07:42:04 +02:00
|
|
|
};
|
|
|
|
|
2012-05-06 04:58:49 +02:00
|
|
|
my $status = "$d->{class}, $d->{confidence} c.";
|
|
|
|
my $reject = $self->{_args}{reject} or do {
|
2012-06-22 11:38:01 +02:00
|
|
|
$self->log(LOGINFO, "skip, reject disabled ($status)");
|
2012-05-06 04:58:49 +02:00
|
|
|
return DECLINED;
|
|
|
|
};
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-05-06 04:58:49 +02:00
|
|
|
if ( $reject eq 'agree' ) {
|
2012-06-25 08:51:36 +02:00
|
|
|
return $self->reject_agree( $transaction );
|
2012-05-06 04:58:49 +02:00
|
|
|
};
|
2012-06-22 11:38:01 +02:00
|
|
|
|
2012-05-06 04:58:49 +02:00
|
|
|
if ( $d->{class} eq 'Innocent' ) {
|
2012-06-22 11:38:01 +02:00
|
|
|
$self->log(LOGINFO, "pass, $status");
|
2012-05-06 04:58:49 +02:00
|
|
|
return DECLINED;
|
|
|
|
};
|
|
|
|
if ( $self->qp->connection->relay_client ) {
|
2012-06-22 11:38:01 +02:00
|
|
|
$self->log(LOGINFO, "skip, allowing spam, user authenticated ($status)");
|
2012-05-06 04:58:49 +02:00
|
|
|
return DECLINED;
|
|
|
|
};
|
|
|
|
if ( $d->{probability} <= $reject ) {
|
|
|
|
$self->log(LOGINFO, "pass, $d->{class} probability is too low ($d->{probability} < $reject)");
|
|
|
|
return DECLINED;
|
|
|
|
};
|
|
|
|
if ( $d->{confidence} != 1 ) {
|
2012-06-22 11:38:01 +02:00
|
|
|
$self->log(LOGINFO, "pass, $d->{class} confidence is too low ($d->{confidence})");
|
2012-05-06 04:58:49 +02:00
|
|
|
return DECLINED;
|
2012-04-20 07:42:04 +02:00
|
|
|
};
|
|
|
|
|
2012-05-06 04:58:49 +02:00
|
|
|
# dspam is more than $reject percent sure this message is spam
|
2012-06-22 11:38:01 +02:00
|
|
|
$self->log(LOGINFO, "fail, $d->{class}, ($d->{confidence} confident)");
|
|
|
|
my $deny = $self->get_reject_type();
|
|
|
|
return Qpsmtpd::DSN->media_unsupported($deny, 'dspam says, no spam please');
|
2012-04-20 07:42:04 +02:00
|
|
|
}
|
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
sub reject_agree {
|
2012-06-25 08:51:36 +02:00
|
|
|
my ($self, $transaction ) = @_;
|
2012-05-06 04:58:49 +02:00
|
|
|
|
|
|
|
my $sa = $transaction->notes('spamassassin' );
|
2012-06-25 08:51:36 +02:00
|
|
|
my $d = $transaction->notes('dspam' );
|
2012-05-06 04:58:49 +02:00
|
|
|
|
|
|
|
my $status = "$d->{class}, $d->{confidence} c";
|
|
|
|
|
|
|
|
if ( ! $sa->{is_spam} ) {
|
2012-06-22 11:38:01 +02:00
|
|
|
$self->log(LOGINFO, "pass, cannot agree, SA results missing ($status)");
|
2012-05-06 04:58:49 +02:00
|
|
|
return DECLINED;
|
|
|
|
};
|
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
if ( $d->{class} eq 'Spam' ) {
|
|
|
|
if ( $sa->{is_spam} eq 'Yes' ) {
|
2012-06-30 21:37:25 +02:00
|
|
|
$self->adjust_karma( -2 );
|
2012-06-22 11:38:01 +02:00
|
|
|
$self->log(LOGINFO, "fail, agree, $status");
|
|
|
|
my $reject = $self->get_reject_type();
|
|
|
|
return ($reject, 'we agree, no spam please');
|
|
|
|
};
|
|
|
|
|
|
|
|
$self->log(LOGINFO, "fail, disagree, $status");
|
|
|
|
return DECLINED;
|
2012-05-06 04:58:49 +02:00
|
|
|
};
|
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
if ( $d->{class} eq 'Innocent' ) {
|
|
|
|
if ( $sa->{is_spam} eq 'No' ) {
|
|
|
|
if ( $d->{confidence} > .9 ) {
|
2013-03-23 06:07:01 +01:00
|
|
|
$self->adjust_karma( 1 );
|
2012-06-22 11:38:01 +02:00
|
|
|
};
|
|
|
|
$self->log(LOGINFO, "pass, agree, $status");
|
|
|
|
return DECLINED;
|
|
|
|
};
|
|
|
|
$self->log(LOGINFO, "pass, disagree, $status");
|
2012-06-25 08:51:36 +02:00
|
|
|
return DECLINED;
|
2012-06-22 11:38:01 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
$self->log(LOGINFO, "pass, other $status");
|
2012-05-06 04:58:49 +02:00
|
|
|
return DECLINED;
|
|
|
|
};
|
|
|
|
|
|
|
|
sub get_dspam_results {
|
2012-06-22 11:38:01 +02:00
|
|
|
my $self = shift;
|
|
|
|
my $transaction = shift || $self->qp->transaction;
|
2012-05-06 04:58:49 +02:00
|
|
|
|
|
|
|
if ( $transaction->notes('dspam') ) {
|
|
|
|
return $transaction->notes('dspam');
|
|
|
|
};
|
|
|
|
|
|
|
|
my $string = $transaction->header->get('X-DSPAM-Result') or do {
|
|
|
|
$self->log(LOGWARN, "get_dspam_results: failed to find the header");
|
|
|
|
return;
|
|
|
|
};
|
|
|
|
|
2013-03-16 06:12:50 +01:00
|
|
|
my @bits = split /,\s+/, $string; chomp @bits;
|
2012-05-06 04:58:49 +02:00
|
|
|
my $class = shift @bits;
|
|
|
|
my %d;
|
|
|
|
foreach (@bits) {
|
2013-03-16 06:12:50 +01:00
|
|
|
my ($key,$val) = split /=/, $_;
|
2012-05-06 04:58:49 +02:00
|
|
|
$d{$key} = $val;
|
|
|
|
};
|
|
|
|
$d{class} = $class;
|
|
|
|
|
|
|
|
my $message = $d{class};
|
|
|
|
if ( defined $d{probability} && defined $d{confidence} ) {
|
|
|
|
$message .= ", prob: $d{probability}, conf: $d{confidence}";
|
|
|
|
};
|
|
|
|
$self->log(LOGDEBUG, $message);
|
|
|
|
$transaction->notes('dspam', \%d);
|
|
|
|
return \%d;
|
|
|
|
};
|
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
sub attach_headers {
|
2012-06-23 05:44:55 +02:00
|
|
|
my ($self, $r, $transaction) = @_;
|
2012-06-22 11:38:01 +02:00
|
|
|
$transaction ||= $self->qp->transaction;
|
|
|
|
|
2012-06-23 05:44:55 +02:00
|
|
|
my $header_str = "$r->{result}, probability=$r->{probability}, confidence=$r->{confidence}";
|
2012-06-22 11:38:01 +02:00
|
|
|
$self->log(LOGDEBUG, $header_str);
|
|
|
|
my $name = 'X-DSPAM-Result';
|
|
|
|
$transaction->header->delete($name) if $transaction->header->get($name);
|
|
|
|
$transaction->header->add($name, $header_str, 0);
|
|
|
|
|
|
|
|
# the signature header is required if you intend to train dspam later.
|
|
|
|
# In dspam.conf, set: Preference "signatureLocation=headers"
|
2012-06-23 05:44:55 +02:00
|
|
|
$transaction->header->add('X-DSPAM-Signature', $r->{signature}, 0);
|
2012-06-22 11:38:01 +02:00
|
|
|
};
|
|
|
|
|
2012-06-23 05:44:55 +02:00
|
|
|
sub train_error_as_ham {
|
2012-06-22 11:38:01 +02:00
|
|
|
my $self = shift;
|
|
|
|
my $transaction = shift;
|
2012-04-20 07:42:04 +02:00
|
|
|
|
2012-06-22 11:38:01 +02:00
|
|
|
my $user = $self->select_username( $transaction );
|
|
|
|
my $dspam_bin = $self->{_args}{dspam_bin} || '/usr/local/bin/dspam';
|
2012-06-23 05:44:55 +02:00
|
|
|
my $cmd = "$dspam_bin --user $user --mode=toe --source=error --class=innocent --deliver=summary --stdout";
|
2012-06-25 08:51:36 +02:00
|
|
|
my $response = $self->dspam_process( $cmd, $transaction );
|
|
|
|
if ( $response ) {
|
|
|
|
$transaction->notes('dspam', $response);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
$transaction->notes('dspam', { class => 'Innocent', result => 'Innocent', confidence=>1 } );
|
|
|
|
};
|
2012-06-22 11:38:01 +02:00
|
|
|
};
|
|
|
|
|
2012-06-23 05:44:55 +02:00
|
|
|
sub train_error_as_spam {
|
2012-06-22 11:38:01 +02:00
|
|
|
my $self = shift;
|
|
|
|
my $transaction = shift;
|
|
|
|
|
|
|
|
my $user = $self->select_username( $transaction );
|
|
|
|
my $dspam_bin = $self->{_args}{dspam_bin} || '/usr/local/bin/dspam';
|
2012-06-23 05:44:55 +02:00
|
|
|
my $cmd = "$dspam_bin --user $user --mode=toe --source=error --class=spam --deliver=summary --stdout";
|
2012-06-25 08:51:36 +02:00
|
|
|
my $response = $self->dspam_process( $cmd, $transaction );
|
|
|
|
if ( $response ) {
|
|
|
|
$transaction->notes('dspam', $response);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
$transaction->notes('dspam', { class => 'Spam', result => 'Spam', confidence=>1 } );
|
|
|
|
};
|
2012-06-22 11:38:01 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
sub autolearn {
|
2012-06-23 05:44:55 +02:00
|
|
|
my ( $self, $response, $transaction ) = @_;
|
|
|
|
|
|
|
|
defined $self->{_args}{autolearn} or return;
|
|
|
|
|
2012-11-19 06:43:12 +01:00
|
|
|
if ( $self->{_args}{autolearn} ne 'any'
|
|
|
|
&& $self->{_args}{autolearn} ne 'karma'
|
|
|
|
&& $self->{_args}{autolearn} ne 'naughty'
|
|
|
|
&& $self->{_args}{autolearn} ne 'spamassassin'
|
|
|
|
) {
|
|
|
|
$self->log(LOGERROR, "bad autolearn setting! Read 'perldoc plugins/dspam' again!");
|
|
|
|
return;
|
|
|
|
};
|
|
|
|
|
2012-06-30 21:37:25 +02:00
|
|
|
# only train once.
|
2012-06-23 05:44:55 +02:00
|
|
|
$self->autolearn_naughty( $response, $transaction ) and return;
|
|
|
|
$self->autolearn_karma( $response, $transaction ) and return;
|
|
|
|
$self->autolearn_spamassassin( $response, $transaction ) and return;
|
|
|
|
};
|
|
|
|
|
|
|
|
sub autolearn_naughty {
|
|
|
|
my ( $self, $response, $transaction ) = @_;
|
2012-06-22 11:38:01 +02:00
|
|
|
|
|
|
|
my $learn = $self->{_args}{autolearn} or return;
|
|
|
|
|
2012-11-19 06:43:12 +01:00
|
|
|
if ( $learn ne 'naughty' && $learn ne 'any' ) {
|
|
|
|
$self->log(LOGINFO, "skipping naughty autolearn");
|
|
|
|
return;
|
|
|
|
};
|
2012-06-23 05:44:55 +02:00
|
|
|
|
|
|
|
if ( $self->connection->notes('naughty') && $response->{result} eq 'Innocent' ) {
|
|
|
|
$self->log(LOGINFO, "training naughty FN message as spam");
|
|
|
|
$self->train_error_as_spam( $transaction );
|
|
|
|
return 1;
|
2012-06-22 11:38:01 +02:00
|
|
|
};
|
2012-06-23 05:44:55 +02:00
|
|
|
|
2012-11-19 06:43:12 +01:00
|
|
|
$self->log(LOGDEBUG, "falling through naughty autolearn");
|
2012-06-23 05:44:55 +02:00
|
|
|
return;
|
|
|
|
};
|
|
|
|
|
|
|
|
sub autolearn_karma {
|
|
|
|
my ( $self, $response, $transaction ) = @_;
|
|
|
|
|
|
|
|
my $learn = $self->{_args}{autolearn} or return;
|
|
|
|
|
|
|
|
return if ( $learn ne 'karma' && $learn ne 'any' );
|
|
|
|
|
|
|
|
my $karma = $self->connection->notes('karma');
|
|
|
|
return if ! defined $karma;
|
|
|
|
|
2013-03-28 00:04:38 +01:00
|
|
|
if ( $karma < -2 && $response->{result} eq 'Innocent' ) {
|
2013-03-23 06:07:01 +01:00
|
|
|
$self->log(LOGINFO, "training bad karma ($karma) FN as spam");
|
2012-06-23 05:44:55 +02:00
|
|
|
$self->train_error_as_spam( $transaction );
|
|
|
|
return 1;
|
|
|
|
};
|
|
|
|
|
2013-03-28 00:04:38 +01:00
|
|
|
if ( $karma > 2 && $response->{result} eq 'Spam' ) {
|
2013-03-23 06:07:01 +01:00
|
|
|
$self->log(LOGINFO, "training good karma ($karma) FP as ham");
|
2012-06-23 05:44:55 +02:00
|
|
|
$self->train_error_as_ham( $transaction );
|
|
|
|
return 1;
|
2012-06-22 11:38:01 +02:00
|
|
|
};
|
2012-06-23 05:44:55 +02:00
|
|
|
|
|
|
|
return;
|
|
|
|
};
|
|
|
|
|
|
|
|
sub autolearn_spamassassin {
|
|
|
|
my ( $self, $response, $transaction ) = @_;
|
|
|
|
|
|
|
|
my $learn = $self->{_args}{autolearn} or return;
|
|
|
|
|
|
|
|
return if ( $learn ne 'spamassassin' && $learn ne 'any' );
|
|
|
|
|
|
|
|
my $sa = $transaction->notes('spamassassin' );
|
|
|
|
if ( ! $sa || ! $sa->{is_spam} ) {
|
2012-11-20 07:40:57 +01:00
|
|
|
if ( ! $self->connection->notes('naughty') ) {
|
|
|
|
$self->log(LOGERROR, "SA results missing"); # SA skips naughty
|
|
|
|
};
|
2012-06-23 05:44:55 +02:00
|
|
|
return;
|
|
|
|
};
|
|
|
|
|
|
|
|
if ( ! $sa->{autolearn} ) {
|
|
|
|
$self->log(LOGERROR, "SA autolearn unset");
|
|
|
|
return;
|
|
|
|
};
|
|
|
|
|
|
|
|
if ( $sa->{is_spam} eq 'Yes' && $sa->{autolearn} eq 'spam' && $response->{result} eq 'Innocent' ) {
|
2012-06-25 08:51:36 +02:00
|
|
|
$self->log(LOGINFO, "training SA FN as spam");
|
2012-06-23 05:44:55 +02:00
|
|
|
$self->train_error_as_spam( $transaction );
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
elsif ( $sa->{is_spam} eq 'No' && $sa->{autolearn} eq 'ham' && $response->{result} eq 'Spam' ) {
|
2012-06-25 08:51:36 +02:00
|
|
|
$self->log(LOGINFO, "training SA FP as ham");
|
2012-06-23 05:44:55 +02:00
|
|
|
$self->train_error_as_ham( $transaction );
|
|
|
|
return 1;
|
|
|
|
};
|
|
|
|
|
|
|
|
return;
|
2012-06-22 11:38:01 +02:00
|
|
|
};
|