From 102e0682978338aefeb02343f75ed83104f17d91 Mon Sep 17 00:00:00 2001 From: Matt Simerson Date: Fri, 20 Apr 2012 01:42:04 -0400 Subject: [PATCH] added dspam plugin --- config.sample/plugins | 2 + plugins/dspam | 341 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 343 insertions(+) create mode 100644 plugins/dspam diff --git a/config.sample/plugins b/config.sample/plugins index 3ac4af1..451d749 100644 --- a/config.sample/plugins +++ b/config.sample/plugins @@ -57,6 +57,8 @@ spamassassin # # spamassassin reject_threshold 20 munge_subject_threshold 10 +# dspam must run after spamassassin for the learn_from_sa feature to work +dspam learn_from_sa 7 reject 1 # run the clamav virus checking plugin # virus/clamav diff --git a/plugins/dspam b/plugins/dspam new file mode 100644 index 0000000..86f59f0 --- /dev/null +++ b/plugins/dspam @@ -0,0 +1,341 @@ +#!perl -Tw + +=head1 NAME + +dspam - dspam integration for qpsmtpd + +=head1 DESCRIPTION + +qpsmtpd plugin that uses dspam to classify messages. Can use SpamAssassin to +train dspam. + +Adds the X-DSPAM-Result and X-DSPAM-Signature headers to messages. The latter is essential for +training dspam and the former is useful to MDAs, MUAs, and humans. + +=head1 TRAINING DSPAM + +To get dspam into a useful state, it must be trained. The best method way to +train dspam is to feed it two large equal sized corpuses of spam and ham from +your mail server. The dspam authors suggest avoiding public corpuses. I do +this as follows: + +=over 4 + +=item learn from SpamAssassin + +See the docs on the learn_from_sa feature in the CONFIG section. + +=item daily training + +I have a script that crawls the contents of every users maildir each night. +The script builds two lists of messages: ham and spam. + +The spam message list consists of all read messages in folders named Spam +that have changed since the last spam learning run (normally 1 day). + +The ham message list consists of read messages in any folder not named like +Spam, Junk, Trash, or Deleted. This catches messages that users have read +and left in their inbox, filed away into subfolders, and + +=item on-the-fly training + +=back + + + +=head1 CONFIG + +=over 4 + +=item dspam_bin + +The path to the dspam binary. If yours is installed somewhere other +than /usr/local/bin/dspam, you'll need to set this. + +=item learn_from_sa + +Dspam can be trained by SpamAssassin. This relationship between them requires +attention to several important details: + +=over 4 + +=item 1 + +dspam must be listed B spamassassin in the config/plugins file. +Because SA runs first, I crank the SA reject_threshold up above 100 so that +all spam messages will be used to train dspam. + +Once dspam is trained and errors are rare, I plan to run dspam first and +reduce the SA load. + +=item 2 + +Autolearn must be enabled and configured in SpamAssassin. SA autolearn +preferences will determine whether a message is learned as spam or innocent +by dspam. The settings to pay careful attention to in your SA local.cf file +are bayes_auto_learn_threshold_spam and bayes_auto_learn_threshold_nonspam. +Make sure they are both set to conservative values that are certain to +yield no false positives. + +If you are using learn_from_sa and reject, then messages that exceed the SA +threshholds will cause dspam to reject them. Again I say, make sure them SA +autolearn threshholds are set high enough to avoid false positives. + +=item 3 + +dspam must be configured and working properly. I have modified the following +dspam values on my system: + +=over 4 + +=item mysql storage + +=item Trust smtpd + +=item TrainingMode tum + +=item Tokenizer osb + +=item Preference "trainingMode=TOE" + +=item Preference "spamAction=deliver" + +=item Preference "signatureLocation=headers" + +=item TrainPristine off + +=item ParseToHeaders off + +=back + +Of those changes, the most important is the signature location. This plugin +only supports storing the signature in the headers. If you want to train dspam +after delivery (ie, users moving messages to/from spam folders), then the +dspam signature must be in the headers. + +=back + +=item reject + +Set to a floating point value between 0 and 1.00 where 0 is no confidence +and 1.0 is 100% confidence. + +If dspam's confidence is greater than or equal to this threshold, the +message will be rejected. + +=back + + +=head1 MULTIPLE RECIPIENT BEHAVIOR + +For messages with multiple recipients, the user that dspam is running as will +be the dspam username. + +When messages have a single recipient, the recipient address is used as the +dspam username. For dspam to trust qpsmtpd with modifying the username, you +B add the username that qpsmtpd is running to to the dspamd.conf file. + +ie, (Trust smtpd). + +=head1 CHANGES + +=cut + +use strict; + +use Qpsmtpd::Constants; +use Qpsmtpd::DSN; +use IO::Handle; +use Socket qw(:DEFAULT :crlf); + +sub register { + my ($self, $qp, @args) = @_; + + $self->log(LOGERROR, "Bad parameters for the dspam plugin") if @_ % 2; + + %{$self->{_args}} = @args; + + $self->register_hook('data_post', 'dspam_reject') + if $self->{_args}->{reject}; +} + +sub hook_data_post { + my ($self, $transaction) = @_; + + $self->log(LOGDEBUG, "check_dspam"); + return (DECLINED) if $transaction->data_size > 500_000; + + my $username = $self->select_username( $transaction ); + my $message = $self->assemble_message($transaction); + my $filtercmd = $self->get_filter_cmd( $transaction, $username ); + $self->log(LOGWARN, $filtercmd); + + my $response = $self->dspam_process( $filtercmd, $message ); + if ( ! $response ) { + $self->log(LOGWARN, "No response received from dspam. Check your logs for errors."); + return (DECLINED); + }; + $self->log(LOGWARN, $response); + + # X-DSPAM-Result: user@example.com; result="Spam"; class="Spam"; probability=1.0000; confidence=1.00; signature=N/A + # X-DSPAM-Result: smtpd; result="Innocent"; class="Innocent"; probability=0.0023; confidence=1.00; signature=4f8dae6a446008399211546 + my ($result,$prob,$conf,$sig) = $response =~ /result=\"(Spam|Innocent)\";.*?probability=([\d\.]+); confidence=([\d\.]+); signature=(.*)/; + my $header_str = "$result, probability=$prob, confidence=$conf"; + $self->log(LOGWARN, $header_str); + $transaction->header->add('X-DSPAM-Result', $header_str, 0); + + # the signature header is required if you intend to train dspam later + # you must set Preference "signatureLocation=headers" in dspam.conf + $transaction->header->add('X-DSPAM-Signature', $sig, 0); + + return (DECLINED); +}; + +sub select_username { + my ($self, $transaction) = @_; + + my $recipient_count = scalar $transaction->recipients; + $self->log(LOGDEBUG, "Message has $recipient_count recipients"); + + if ( $recipient_count > 1 ) { + $self->log(LOGINFO, "skipping user prefs, $recipient_count recipients detected."); + return getpwuid($>); + }; + +# use the recipients email address as username. This enables user prefs + my $username = ($transaction->recipients)[0]->address; + return lc($username); +}; + +sub assemble_message { + my ($self, $transaction) = @_; + + $transaction->body_resetpos; + + my $message = "X-Envelope-From: " + . $transaction->sender->format . "\n" + . $transaction->header->as_string . "\n\n"; + + while (my $line = $transaction->body_getline) { $message .= $line; }; + + $message = join(CRLF, split/\n/, $message); + return $message . CRLF; +}; + +sub dspam_process { + my ( $self, $filtercmd, $message ) = @_; + + #return $self->dspam_process_open2( $filtercmd, $message ); + + my ($in_fh, $out_fh); + if (! open($in_fh, "-|")) { + open($out_fh, "|$filtercmd") or die "Can't run $filtercmd: $!\n"; + print $out_fh $message; + close $out_fh; + exit(0); + }; + my $response = join('', <$in_fh>); + close $in_fh; + chomp $response; + + return $response; +}; + +sub dspam_process_open2 { + my ( $self, $filtercmd, $message ) = @_; + +# not sure why, but this is not as reliable as I'd like. What's a dspam +# error -5 mean anyway? + use FileHandle; + use IPC::Open2; + my ($dspam_in, $dspam_out); + my $pid = open2($dspam_out, $dspam_in, $filtercmd); + print $dspam_in $message; + close $dspam_in; + my $response = join('', <$dspam_out>); + waitpid $pid, 0; + chomp $response; + return $response; +}; + +sub dspam_reject { + my ($self, $transaction) = @_; + + return (DECLINED) if ! $self->{_args}->{reject}; + + my $status = $transaction->header->get('X-DSPAM-Result') or do { + $self->log(LOGWARN, "dspam_reject: failed to find the dspam header"); + return (DECLINED); + }; + my ($clas,$probability,$confidence) = $status =~ m/^(Spam|Innocent), probability=([\d\.]+), confidence=([\d\.]+)/i; + + $self->log(LOGDEBUG, "dspam $clas, prob: $probability, conf: $confidence"); + + if ( $clas eq 'Spam' && $probability == 1 && $confidence == 1 ) { +# default of media_unsupported is DENY, so just change the message + if ( $self->qp->connection->relay_client ) { + $self->log(LOGWARN, "allowing spam since user authenticated"); + return DECLINED; + }; + return Qpsmtpd::DSN->media_unsupported('dspam says, no spam please'); + }; + + return DECLINED; +} + +sub get_filter_cmd { + my ($self, $transaction, $user) = @_; + + my $dspam_bin = $self->{_args}->{dspam_bin} || '/usr/local/bin/dspam'; + my $default = "$dspam_bin --user $user --mode=tum --process --deliver=summary --stdout"; + my $min_score = $self->{_args}->{learn_from_sa} or return $default; + + #$self->log(LOGDEBUG, "attempting to learn from SA"); + my $sa_status = $transaction->header->get('X-Spam-Status'); + + if ( ! $sa_status ) { + $self->log(LOGERROR, "dspam learn_from_sa was set but no X-Spam-Status header detected"); + return $default; + }; + chomp $sa_status; + + my ($is_spam,$score,$autolearn) = $sa_status =~ /^(yes|no), score=([\d\.\-]+)\s.*?autolearn=([\w]+)/i; + $self->log(LOGINFO, "sa_status: $sa_status; $is_spam; $autolearn"); + + $is_spam = lc($is_spam); + $autolearn = lc($autolearn); + + if ( $is_spam eq 'yes' && $score < $min_score ) { + $self->log(LOGWARN, "SA spam score of $score is less than $min_score, skipping autolearn"); + return $default; + }; + + if ( $is_spam eq 'yes' && $autolearn eq 'spam' ) { + return "$dspam_bin --user $user --mode=tum --source=corpus --class=spam --deliver=summary --stdout"; + } + elsif ( $is_spam eq 'no' && $autolearn eq 'ham' ) { + return "$dspam_bin --user $user --mode=tum --source=corpus --class=innocent --deliver=summary --stdout"; + }; + + return $default; +}; + +sub _cleanup_spam_header { + my ($self, $transaction, $header_name) = @_; + + my $action = 'rename'; + if ( $self->{_args}->{leave_old_headers} ) { + $action = lc($self->{_args}->{leave_old_headers}); + }; + + return unless $action eq 'drop' || $action eq 'rename'; + + my $old_header_name = $header_name; + $old_header_name = ($old_header_name =~ s/^X-//) ? "X-Old-$old_header_name" : "Old-$old_header_name"; + + for my $header ( $transaction->header->get($header_name) ) { + $transaction->header->add($old_header_name, $header) if $action eq 'rename'; + $transaction->header->delete($header_name); + } +} +