2012-04-29 10:35:59 +02:00
|
|
|
#!perl -w
|
2007-10-23 10:47:38 +02:00
|
|
|
|
|
|
|
=head1 NAME
|
|
|
|
|
|
|
|
uribl - URIBL blocking plugin for qpsmtpd
|
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
|
|
|
|
This plugin implements DNSBL lookups for URIs found in spam, such as that
|
|
|
|
implemented by SURBL (see E<lt>http://surbl.org/E<gt>). Incoming messages are
|
|
|
|
scanned for URIs, which are then checked against one or more URIBLs in a
|
|
|
|
fashion similar to DNSBL systems.
|
|
|
|
|
|
|
|
=head1 CONFIGURATION
|
|
|
|
|
|
|
|
To enable the plugin, add it to I<~qpsmtpd/config/plugins>. The list of
|
|
|
|
URIBLs to check should be placed in I<uribl_zones> in the config directory
|
|
|
|
(typically I<~qpsmtpd/config>).
|
|
|
|
|
|
|
|
The format of the I<uribl_zones> file is a list of URIBL DNS zones, one per
|
|
|
|
line, consisting of one or more columns separated by whitespace. The first
|
|
|
|
column (the only mandatoy one) should consist of the URIBL zone.
|
|
|
|
|
|
|
|
The second column may contain a comma-delimited list of integers selecting
|
|
|
|
mask values to be applied to the A record(s) returned from a URIBL. This
|
|
|
|
enables the use of composite DNSBLs, such as multi.surbl.org, where several
|
|
|
|
lists are combined so they may be accessed with a single query; any returns
|
|
|
|
are checked against the mask of lists you're interested in. If unspecified,
|
|
|
|
or if a negative number is given, all lists in a composite URIBL will be
|
|
|
|
checked. URIBL operators prefer that you use the composite lists to reduce
|
|
|
|
their own query load, and it's more efficient for qpsmtpd as well.
|
|
|
|
|
|
|
|
The third column specifies an action, which overrides the default action
|
|
|
|
configured with the I<action> setting discussed below.
|
|
|
|
|
|
|
|
For example:
|
|
|
|
|
|
|
|
=over 4
|
|
|
|
|
|
|
|
multi.surbl.org 2,8 deny
|
|
|
|
ob.surbl.org 1 add-header
|
|
|
|
|
|
|
|
=back
|
|
|
|
|
|
|
|
You may specify the following config option(s) in the I<qpsmtpd/config> file:
|
|
|
|
|
|
|
|
=over 4
|
|
|
|
|
|
|
|
=item action
|
|
|
|
|
|
|
|
Specifies what to do when a URI is matched in a URIBL. Available options are
|
|
|
|
I<add-header> (the default) I<deny> and I<denysoft>. If set to add-header, an
|
|
|
|
X-URIBL-Match: header will be added explaining the URIBL entry found. If set
|
|
|
|
to 'deny,' the delivery will be declined with a hard failure. If set to
|
|
|
|
denysoft, the delivery will be soft failed (this is probably not a good idea.)
|
|
|
|
|
|
|
|
=item timeout
|
|
|
|
|
|
|
|
Timeout for DNS requests, in seconds. The default is 30 seconds. DNS
|
|
|
|
requests are issued asynchronously and in parallel for all hosts found
|
|
|
|
in URIs in the mail; the same timeout will apply to each; see the
|
|
|
|
Net::DNS documentation for details.
|
|
|
|
|
|
|
|
=item scan-headers
|
|
|
|
|
|
|
|
If set true, any headers found in the URIs will be checked as well. Disabled
|
|
|
|
by default.
|
|
|
|
|
2020-06-09 00:48:39 +02:00
|
|
|
=item checkip [yes|no]
|
|
|
|
|
|
|
|
Specifies whether ip-addresses should be searched for or not. Some URIBLs
|
|
|
|
only allows domain names and triggers positive on any ip-address.
|
|
|
|
Default value: yes.
|
|
|
|
|
2007-10-23 10:47:38 +02:00
|
|
|
=back
|
|
|
|
|
|
|
|
=head1 CAUTIONS
|
|
|
|
|
|
|
|
When used in I<deny> or I<denysoft> mode, a URIBL check can block not
|
|
|
|
only the original spam containing a listed URI, but mail unintentionally
|
|
|
|
carrying that URI, such as forwarded complaints. The uribl checks should
|
|
|
|
only be used in these modes if you know what you're doing.
|
|
|
|
|
|
|
|
The URI scanner used by the uribl plugin is quite aggressive, and attempts to
|
|
|
|
detect all forms of URIs supported by typical MUAs (even those that lack a
|
|
|
|
protocol specification, for example.) However, it does not attempt to detect
|
|
|
|
URIs that have been mangled beyond programmatic reconstruction. Even so, it
|
|
|
|
may issue spurious lookups on unintended URIs, especially those in non-text
|
|
|
|
sections of the mail.
|
|
|
|
|
|
|
|
=head1 COPYRIGHT
|
|
|
|
|
|
|
|
uribl is copyright 2004-2007 by Devin Carraway E<lt>qpsmtpd@devin.comE<gt>. It
|
|
|
|
may be used and redistributed under the same terms as qpsmtpd itself.
|
|
|
|
|
|
|
|
=cut
|
|
|
|
|
2012-06-27 09:23:27 +02:00
|
|
|
use strict;
|
|
|
|
use warnings;
|
2007-10-23 10:47:38 +02:00
|
|
|
|
2008-05-19 09:22:51 +02:00
|
|
|
use Qpsmtpd::Constants;
|
|
|
|
|
2012-06-27 09:23:27 +02:00
|
|
|
use Time::HiRes qw(time);
|
|
|
|
use IO::Select;
|
2007-10-23 10:47:38 +02:00
|
|
|
|
|
|
|
# ccTLDs that allocate domain names within a strict two-level hierarchy,
|
|
|
|
# as in *.co.uk
|
|
|
|
my %strict_twolevel_cctlds = (
|
2013-04-21 06:50:39 +02:00
|
|
|
'ac' => 1,
|
|
|
|
'ae' => 1,
|
|
|
|
'uk' => 1,
|
|
|
|
'ai' => 1,
|
|
|
|
'ar' => 1,
|
|
|
|
'at' => 1,
|
|
|
|
'au' => 1,
|
|
|
|
'az' => 1,
|
|
|
|
'bb' => 1,
|
|
|
|
'bh' => 1,
|
|
|
|
'bm' => 1,
|
|
|
|
'br' => 1,
|
|
|
|
'bs' => 1,
|
|
|
|
'ca' => 1,
|
|
|
|
'ck' => 1,
|
|
|
|
'cn' => 1,
|
|
|
|
'co' => 1,
|
|
|
|
'cr' => 1,
|
|
|
|
'cu' => 1,
|
|
|
|
'cy' => 1,
|
|
|
|
'do' => 1,
|
|
|
|
'et' => 1,
|
|
|
|
'ge' => 1,
|
|
|
|
'hk' => 1,
|
|
|
|
'id' => 1,
|
|
|
|
'il' => 1,
|
|
|
|
'jp' => 1,
|
|
|
|
'kr' => 1,
|
|
|
|
'kw' => 1,
|
|
|
|
'lv' => 1,
|
|
|
|
'sg' => 1,
|
|
|
|
'za' => 1,
|
|
|
|
);
|
2007-10-23 10:47:38 +02:00
|
|
|
|
2008-06-02 17:51:04 +02:00
|
|
|
sub init {
|
2007-10-23 10:47:38 +02:00
|
|
|
my ($self, $qp, %args) = @_;
|
|
|
|
|
2013-04-21 06:50:39 +02:00
|
|
|
$self->{action} = $args{action} || 'add-header';
|
2007-10-23 10:47:38 +02:00
|
|
|
$self->{timeout} = $args{timeout} || 30;
|
2020-06-09 00:48:39 +02:00
|
|
|
$self->{checkip} = $args{checkip} || 'yes';
|
2013-04-21 06:50:39 +02:00
|
|
|
|
2010-02-19 09:04:53 +01:00
|
|
|
# scan-headers was the originally documented name for this option, while
|
|
|
|
# check-headers actually implements it, so tolerate both.
|
|
|
|
$self->{check_headers} = $args{'check-headers'} || $args{'scan-headers'};
|
2007-10-23 10:47:38 +02:00
|
|
|
|
|
|
|
$args{mask} ||= 0x00ffffff;
|
|
|
|
$self->{mask} = 0;
|
|
|
|
|
|
|
|
my @zones = $self->qp->config('uribl_zones');
|
|
|
|
for (@zones) {
|
|
|
|
chomp;
|
|
|
|
next if !$_ or /^\s*#/;
|
2013-04-21 06:50:39 +02:00
|
|
|
my @z = split(/\s+/, $_);
|
2007-10-23 10:47:38 +02:00
|
|
|
next unless $z[0];
|
|
|
|
|
|
|
|
my $mask = 0;
|
|
|
|
$z[1] ||= 0x00ffffff;
|
|
|
|
for (split /,/, $z[1]) {
|
|
|
|
unless (/^(-?\d+)$/) {
|
|
|
|
$self->log(LOGERROR, "Malformed mask $_ for $z[0]");
|
2014-09-17 20:38:40 +02:00
|
|
|
return;
|
2007-10-23 10:47:38 +02:00
|
|
|
}
|
|
|
|
$mask |= $1 >= 0 ? $1 : 0x00ffffff;
|
|
|
|
}
|
|
|
|
my $action = $z[2] || $self->{action};
|
|
|
|
unless ($action =~ /^(add-header|deny|denysoft|log)$/) {
|
|
|
|
$self->log(LOGERROR, "Unknown action $action for $z[0]");
|
2014-09-17 20:38:40 +02:00
|
|
|
return;
|
2007-10-23 10:47:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
$self->{uribl_zones}->{$z[0]} = {
|
2013-04-21 06:50:39 +02:00
|
|
|
mask => $mask,
|
|
|
|
action => $action,
|
|
|
|
};
|
2007-10-23 10:47:38 +02:00
|
|
|
}
|
|
|
|
keys %{$self->{uribl_zones}} or return 0;
|
|
|
|
|
|
|
|
my @whitelist = $self->qp->config('uribl_whitelist_domains');
|
2013-04-21 06:50:39 +02:00
|
|
|
$self->{whitelist_zones} = {(map { ($_ => 1) } @whitelist)};
|
2007-10-23 10:47:38 +02:00
|
|
|
|
2014-11-06 03:02:58 +01:00
|
|
|
$self->{resolver} = $self->get_resolver(timeout => $self->{timeout});
|
2008-06-02 17:51:04 +02:00
|
|
|
}
|
|
|
|
|
2007-10-23 10:47:38 +02:00
|
|
|
sub send_query {
|
2013-04-21 06:50:39 +02:00
|
|
|
my $self = shift;
|
2014-09-17 20:38:40 +02:00
|
|
|
my $name = shift || return;
|
2007-10-23 10:47:38 +02:00
|
|
|
my $count = 0;
|
|
|
|
|
2014-09-17 20:38:40 +02:00
|
|
|
$self->{socket_select} ||= new IO::Select or return;
|
2007-10-23 10:47:38 +02:00
|
|
|
for my $z (keys %{$self->{uribl_zones}}) {
|
|
|
|
my ($s, $s1);
|
|
|
|
my $index = {
|
2013-04-21 06:50:39 +02:00
|
|
|
zone => $z,
|
|
|
|
name => $name,
|
|
|
|
};
|
2007-10-23 10:47:38 +02:00
|
|
|
|
|
|
|
next unless $z;
|
|
|
|
next if exists $self->{sockets}->{$z}->{$name};
|
|
|
|
$s = $self->{resolver}->bgsend("$name.$z", 'A');
|
|
|
|
if (defined $s) {
|
|
|
|
$self->{sockets}->{$z}->{$name}->{'a'} = $s;
|
|
|
|
$self->{socket_select}->add($s);
|
|
|
|
$self->{socket_idx}->{"$s"} = $index;
|
|
|
|
$count++;
|
2013-04-21 06:50:39 +02:00
|
|
|
}
|
|
|
|
else {
|
2007-10-23 10:47:38 +02:00
|
|
|
$self->log(LOGERROR,
|
2013-04-21 06:50:39 +02:00
|
|
|
"Couldn't open socket for A record '$name.$z': "
|
|
|
|
. ($self->{resolver}->errorstring || 'unknown error')
|
|
|
|
);
|
2007-10-23 10:47:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
$s1 = $self->{resolver}->bgsend("$name.$z", 'TXT');
|
|
|
|
if (defined $s1) {
|
|
|
|
$self->{sockets}->{$z}->{$name}->{'txt'} = $s1;
|
|
|
|
$self->{socket_select}->add($s1);
|
|
|
|
$self->{socket_idx}->{"$s1"} = $index;
|
|
|
|
$count++;
|
2013-04-21 06:50:39 +02:00
|
|
|
}
|
|
|
|
else {
|
2007-10-23 10:47:38 +02:00
|
|
|
$self->log(LOGERROR,
|
2013-04-21 06:50:39 +02:00
|
|
|
"Couldn't open socket for TXT record '$name.$z': "
|
|
|
|
. ($self->{resolver}->errorstring || 'unknown error')
|
|
|
|
);
|
2007-10-23 10:47:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
$self->{sockets}->{$z}->{$name} = {};
|
|
|
|
}
|
|
|
|
$count;
|
|
|
|
}
|
|
|
|
|
|
|
|
sub lookup_finish {
|
|
|
|
my $self = shift;
|
|
|
|
$self->{socket_idx} = {};
|
2013-04-21 06:50:39 +02:00
|
|
|
$self->{sockets} = {};
|
2007-10-23 10:47:38 +02:00
|
|
|
undef $self->{socket_select};
|
|
|
|
}
|
|
|
|
|
|
|
|
sub evaluate {
|
|
|
|
my $self = shift;
|
2014-09-17 20:38:40 +02:00
|
|
|
my $zone = shift || return;
|
|
|
|
my $a = shift || return;
|
2007-10-23 10:47:38 +02:00
|
|
|
|
|
|
|
my $mask = $self->{uribl_zones}->{$zone}->{mask} || $self->{mask};
|
2014-09-17 20:38:40 +02:00
|
|
|
$a =~ /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/ or return;
|
2013-04-21 06:50:39 +02:00
|
|
|
my $v =
|
|
|
|
(($1 & 0xff) << 24) | (($2 & 0xff) << 16) | (($3 & 0xff) << 8) |
|
|
|
|
($4 & 0xff);
|
2014-09-18 03:28:51 +02:00
|
|
|
return $v & $mask;
|
2007-10-23 10:47:38 +02:00
|
|
|
}
|
|
|
|
|
2008-06-02 17:51:04 +02:00
|
|
|
sub lookup_start {
|
|
|
|
my ($self, $transaction, $start_query) = @_;
|
|
|
|
|
2007-10-23 10:47:38 +02:00
|
|
|
my $l;
|
|
|
|
my $queries = 0;
|
|
|
|
my %pending;
|
|
|
|
my @qp_continuations;
|
2020-06-09 00:48:39 +02:00
|
|
|
|
|
|
|
my $tlds4regex = $self->get_tlds4regex;
|
2007-10-23 10:47:38 +02:00
|
|
|
|
2008-05-05 19:05:38 +02:00
|
|
|
$transaction->body_resetpos;
|
2013-04-21 06:50:39 +02:00
|
|
|
|
|
|
|
# if we're not looking for URIs in the headers, read past that point
|
|
|
|
# before starting to actually look for any
|
2010-02-19 09:04:53 +01:00
|
|
|
while (!$self->{check_headers} and $l = $transaction->body_getline) {
|
2007-10-23 10:47:38 +02:00
|
|
|
chomp $l;
|
|
|
|
last if !$l;
|
|
|
|
}
|
2020-06-09 00:48:39 +02:00
|
|
|
while ($l = $transaction->body_getline || @qp_continuations) {
|
2007-10-23 10:47:38 +02:00
|
|
|
chomp $l;
|
|
|
|
|
2010-02-19 09:04:53 +01:00
|
|
|
if ($l =~ /(.*)=$/) {
|
|
|
|
push @qp_continuations, $1;
|
2020-06-09 00:48:39 +02:00
|
|
|
next;
|
2013-04-21 06:50:39 +02:00
|
|
|
}
|
|
|
|
elsif (@qp_continuations) {
|
2020-06-09 00:48:39 +02:00
|
|
|
$l = '' if $l eq '1';
|
2010-02-19 09:04:53 +01:00
|
|
|
$l = join('', @qp_continuations, $l);
|
|
|
|
@qp_continuations = ();
|
|
|
|
}
|
2007-10-23 10:47:38 +02:00
|
|
|
|
|
|
|
# Undo URI escape munging
|
|
|
|
$l =~ s/[=%]([0-9A-Fa-f]{2,2})/chr(hex($1))/ge;
|
2013-04-21 06:50:39 +02:00
|
|
|
|
2007-10-23 10:47:38 +02:00
|
|
|
# Undo HTML entity munging (e.g. in parameterized redirects)
|
|
|
|
$l =~ s/&#(\d{2,3});?/chr($1)/ge;
|
2013-04-21 06:50:39 +02:00
|
|
|
|
2007-10-23 10:47:38 +02:00
|
|
|
# Dodge inserted-semicolon munging
|
|
|
|
$l =~ tr/;//d;
|
|
|
|
|
2020-06-09 00:48:39 +02:00
|
|
|
# Check for IP-addresses?
|
|
|
|
if ($self->{checkip} eq 'yes') {
|
2013-04-21 06:50:39 +02:00
|
|
|
while (
|
|
|
|
$l =~ m{
|
2007-10-23 10:47:38 +02:00
|
|
|
\w{3,16}:/+ # protocol
|
|
|
|
(?:\S+@)? # user/pass
|
|
|
|
(\d{7,}) # raw-numeric IP
|
|
|
|
(?::\d*)?([/?\s]|$) # port, slash
|
|
|
|
# or EOL
|
2013-04-21 06:50:39 +02:00
|
|
|
}gx
|
|
|
|
)
|
|
|
|
{
|
2007-10-23 10:47:38 +02:00
|
|
|
my @octets = (
|
2013-04-21 06:50:39 +02:00
|
|
|
(($1 >> 24) & 0xff),
|
|
|
|
(($1 >> 16) & 0xff),
|
|
|
|
(($1 >> 8) & 0xff),
|
|
|
|
($1 & 0xff)
|
|
|
|
);
|
2007-10-23 10:47:38 +02:00
|
|
|
my $fwd = join('.', @octets);
|
|
|
|
my $rev = join('.', reverse @octets);
|
2013-04-21 06:50:39 +02:00
|
|
|
$self->log(LOGDEBUG,
|
|
|
|
"uribl: matched pure-integer ipaddr $1 ($fwd)");
|
2007-10-23 10:47:38 +02:00
|
|
|
unless (exists $pending{$rev}) {
|
2008-06-02 17:51:04 +02:00
|
|
|
$queries += $start_query->($self, $rev);
|
2007-10-23 10:47:38 +02:00
|
|
|
$pending{$rev} = 1;
|
|
|
|
}
|
|
|
|
}
|
2013-04-21 06:50:39 +02:00
|
|
|
while (
|
|
|
|
$l =~ m{
|
2007-10-23 10:47:38 +02:00
|
|
|
\w{3,16}:/+ # protocol
|
|
|
|
(?:\S+@)? # user/pass
|
|
|
|
(\d+|0[xX][0-9A-Fa-f]+)\. # IP address
|
|
|
|
(\d+|0[xX][0-9A-Fa-f]+)\.
|
|
|
|
(\d+|0[xX][0-9A-Fa-f]+)\.
|
|
|
|
(\d+|0[xX][0-9A-Fa-f]+)
|
2013-04-21 06:50:39 +02:00
|
|
|
}gx
|
|
|
|
)
|
|
|
|
{
|
|
|
|
my @octets = ($1, $2, $3, $4);
|
|
|
|
|
2007-10-23 10:47:38 +02:00
|
|
|
# return any octal/hex octets in the IP addr back
|
|
|
|
# to decimal form (e.g. http://0x7f.0.0.00001)
|
2013-04-21 06:50:39 +02:00
|
|
|
for (0 .. $#octets) {
|
2007-10-23 10:47:38 +02:00
|
|
|
$octets[$_] =~ s/^0([0-7]+)$/oct($1)/e;
|
|
|
|
$octets[$_] =~ s/^0x([0-9a-fA-F]+)$/hex($1)/e;
|
|
|
|
}
|
|
|
|
my $fwd = join('.', @octets);
|
|
|
|
my $rev = join('.', reverse @octets);
|
|
|
|
$self->log(LOGDEBUG, "uribl: matched URI ipaddr $fwd");
|
|
|
|
unless (exists $pending{$rev}) {
|
2008-06-02 17:51:04 +02:00
|
|
|
$queries += $start_query->($self, $rev);
|
2007-10-23 10:47:38 +02:00
|
|
|
$pending{$rev} = 1;
|
|
|
|
}
|
|
|
|
}
|
2020-06-09 00:48:39 +02:00
|
|
|
}
|
2013-04-21 06:50:39 +02:00
|
|
|
while (
|
|
|
|
$l =~ m{
|
2008-05-19 09:22:51 +02:00
|
|
|
((?:www\.)? # www?
|
2020-06-09 00:48:39 +02:00
|
|
|
[a-zA-Z0-9](?:[a-zA-Z0-9\-]+\.)+ # hostname
|
|
|
|
(?:$tlds4regex|[a-zA-Z]{2}) # tld
|
2008-05-19 09:22:51 +02:00
|
|
|
)(?!\w)
|
2013-04-21 06:50:39 +02:00
|
|
|
}gix
|
|
|
|
)
|
|
|
|
{
|
2008-05-17 05:55:18 +02:00
|
|
|
my $host = lc $1;
|
2007-10-23 10:47:38 +02:00
|
|
|
my @host_domains = split /\./, $host;
|
|
|
|
$self->log(LOGDEBUG, "uribl: matched 'www.' hostname $host");
|
|
|
|
|
2013-04-21 06:50:39 +02:00
|
|
|
my $cutoff =
|
|
|
|
exists $strict_twolevel_cctlds{$host_domains[$#host_domains]}
|
|
|
|
? 3
|
|
|
|
: 2;
|
|
|
|
if (
|
|
|
|
exists $self->{whitelist_zones}->{
|
|
|
|
join('.',
|
|
|
|
@host_domains[($#host_domains - $cutoff + 1)
|
|
|
|
.. $#host_domains])
|
|
|
|
}
|
|
|
|
)
|
|
|
|
{
|
2007-10-23 10:47:38 +02:00
|
|
|
$self->log(LOGINFO, "Skipping whitelist URI domain '$host'");
|
2013-04-21 06:50:39 +02:00
|
|
|
}
|
|
|
|
else {
|
2007-10-23 10:47:38 +02:00
|
|
|
while (@host_domains >= $cutoff) {
|
|
|
|
my $subhost = join('.', @host_domains);
|
|
|
|
unless (exists $pending{$subhost}) {
|
2013-04-21 06:50:39 +02:00
|
|
|
$self->log(LOGINFO,
|
|
|
|
"URIBL: checking sub-host $subhost");
|
2008-06-02 17:51:04 +02:00
|
|
|
$queries += $start_query->($self, $subhost);
|
2007-10-23 10:47:38 +02:00
|
|
|
$pending{$subhost} = 1;
|
|
|
|
}
|
|
|
|
shift @host_domains;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-04-21 06:50:39 +02:00
|
|
|
while (
|
|
|
|
$l =~ m{
|
2007-10-23 10:47:38 +02:00
|
|
|
\w{3,16}:/+ # protocol
|
|
|
|
(?:\S+@)? # user/pass
|
2008-07-04 04:02:26 +02:00
|
|
|
(
|
2020-06-09 00:48:39 +02:00
|
|
|
[a-zA-Z0-9](?:[a-zA-Z0-9\-]+\.)+ # hostname
|
|
|
|
(?:$tlds4regex|[a-zA-Z]{2}) # tld
|
|
|
|
)(?!\w)
|
2013-04-21 06:50:39 +02:00
|
|
|
}gix
|
|
|
|
)
|
|
|
|
{
|
2008-05-17 05:55:18 +02:00
|
|
|
my $host = lc $1;
|
2007-10-23 10:47:38 +02:00
|
|
|
my @host_domains = split /\./, $host;
|
|
|
|
$self->log(LOGDEBUG, "uribl: matched full URI hostname $host");
|
|
|
|
|
2013-04-21 06:50:39 +02:00
|
|
|
my $cutoff =
|
|
|
|
exists $strict_twolevel_cctlds{$host_domains[$#host_domains]}
|
|
|
|
? 3
|
|
|
|
: 2;
|
|
|
|
if (
|
|
|
|
exists $self->{whitelist_zones}
|
|
|
|
->{join('.', @host_domains[($cutoff - 1) .. $#host_domains])})
|
|
|
|
{
|
2007-10-23 10:47:38 +02:00
|
|
|
|
|
|
|
$self->log(LOGINFO, "Skipping whitelist URI domain '$host'");
|
2013-04-21 06:50:39 +02:00
|
|
|
}
|
|
|
|
else {
|
2007-10-23 10:47:38 +02:00
|
|
|
while (@host_domains >= $cutoff) {
|
|
|
|
my $subhost = join('.', @host_domains);
|
|
|
|
unless (exists $pending{$subhost}) {
|
2013-04-21 06:50:39 +02:00
|
|
|
$self->log(LOGINFO,
|
|
|
|
"URIBL: checking sub-host $subhost");
|
2008-06-02 17:51:04 +02:00
|
|
|
$queries += $start_query->($self, $subhost);
|
2007-10-23 10:47:38 +02:00
|
|
|
$pending{$subhost} = 1;
|
|
|
|
}
|
|
|
|
shift @host_domains;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2008-05-05 19:05:38 +02:00
|
|
|
$transaction->body_resetpos;
|
2007-10-23 10:47:38 +02:00
|
|
|
|
2008-06-02 17:51:04 +02:00
|
|
|
return $queries;
|
|
|
|
}
|
|
|
|
|
|
|
|
sub collect_results {
|
|
|
|
my ($self, $transaction) = @_;
|
2007-10-23 10:47:38 +02:00
|
|
|
|
2013-04-21 06:50:39 +02:00
|
|
|
my $matches = 0;
|
|
|
|
my $complete = 0;
|
2007-10-23 10:47:38 +02:00
|
|
|
my $start_time = time;
|
|
|
|
while ($self->{socket_select}->handles) {
|
|
|
|
my $timeout = ($start_time + $self->{timeout}) - time;
|
|
|
|
last if $timeout <= 0;
|
|
|
|
|
|
|
|
my @ready = $self->{socket_select}->can_read($timeout);
|
|
|
|
|
2013-04-21 06:50:39 +02:00
|
|
|
SOCK: for my $s (@ready) {
|
2007-10-23 10:47:38 +02:00
|
|
|
$self->{socket_select}->remove($s);
|
|
|
|
my $r = $self->{socket_idx}->{"$s"} or next SOCK;
|
2013-04-21 06:50:39 +02:00
|
|
|
$self->log(LOGDEBUG,
|
|
|
|
"from $r: socket $s: "
|
|
|
|
. join(', ', map { "$_=$r->{$_}" } keys %{$r})
|
|
|
|
);
|
|
|
|
my $zone = $r->{zone};
|
|
|
|
my $name = $r->{name};
|
|
|
|
my $h = $self->{sockets}->{$zone}->{$name};
|
2007-10-23 10:47:38 +02:00
|
|
|
my $packet = $self->{resolver}->bgread($s)
|
2013-04-21 06:50:39 +02:00
|
|
|
or next SOCK;
|
2007-10-23 10:47:38 +02:00
|
|
|
|
|
|
|
for my $a ($packet->answer) {
|
|
|
|
if ($a->type eq 'TXT') {
|
|
|
|
$h->{txt} = $a->txtdata;
|
|
|
|
}
|
|
|
|
elsif ($a->type eq 'A') {
|
|
|
|
$h->{a} = $a->address;
|
|
|
|
if ($self->evaluate($zone, $h->{a})) {
|
2013-04-21 06:50:39 +02:00
|
|
|
$self->log(LOGDEBUG, "match in $zone");
|
2007-10-23 10:47:38 +02:00
|
|
|
$h->{match} = 1;
|
|
|
|
$matches++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
$complete++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
my $elapsed = time - $start_time;
|
|
|
|
$self->log(LOGINFO,
|
2013-04-21 06:50:39 +02:00
|
|
|
sprintf(
|
|
|
|
"$complete lookup%s finished in %.2f sec (%d match%s)",
|
|
|
|
$complete == 1 ? '' : 's', $elapsed,
|
|
|
|
$matches, $matches == 1 ? '' : 'es'
|
|
|
|
)
|
|
|
|
);
|
2007-10-23 10:47:38 +02:00
|
|
|
|
|
|
|
my @matches = ();
|
|
|
|
for my $z (keys %{$self->{sockets}}) {
|
|
|
|
for my $n (keys %{$self->{sockets}->{$z}}) {
|
|
|
|
my $h = $self->{sockets}->{$z}->{$n};
|
|
|
|
next unless $h->{match};
|
2013-04-21 06:50:39 +02:00
|
|
|
push @matches,
|
|
|
|
{
|
|
|
|
action => $self->{uribl_zones}->{$z}->{action},
|
|
|
|
desc => "$n in $z: " . ($h->{txt} || $h->{a}),
|
|
|
|
};
|
2007-10-23 10:47:38 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
$self->lookup_finish;
|
|
|
|
|
2008-06-02 17:51:04 +02:00
|
|
|
return \@matches;
|
|
|
|
}
|
|
|
|
|
2016-04-19 18:22:43 +02:00
|
|
|
sub hook_data_post {
|
2008-06-02 17:51:04 +02:00
|
|
|
my ($self, $transaction) = @_;
|
|
|
|
|
2014-09-18 03:28:51 +02:00
|
|
|
return DECLINED if $self->is_immune();
|
2012-06-03 03:44:46 +02:00
|
|
|
|
2013-04-21 06:50:39 +02:00
|
|
|
my $queries = $self->lookup_start(
|
|
|
|
$transaction,
|
|
|
|
sub {
|
|
|
|
my ($self, $name) = @_;
|
|
|
|
return $self->send_query($name);
|
|
|
|
}
|
|
|
|
);
|
2008-06-02 17:51:04 +02:00
|
|
|
|
2014-09-17 20:38:40 +02:00
|
|
|
if (!$queries) {
|
2012-12-12 20:07:19 +01:00
|
|
|
$self->log(LOGINFO, "pass, No URIs found in mail");
|
2008-06-02 17:51:04 +02:00
|
|
|
return DECLINED;
|
|
|
|
}
|
|
|
|
|
|
|
|
my $matches = $self->collect_results($transaction);
|
|
|
|
for (@$matches) {
|
2007-10-23 10:47:38 +02:00
|
|
|
$self->log(LOGWARN, $_->{desc});
|
|
|
|
if ($_->{action} eq 'add-header') {
|
2012-06-23 05:52:32 +02:00
|
|
|
$transaction->header->add('X-URIBL-Match', $_->{desc}, 0);
|
2013-04-21 06:50:39 +02:00
|
|
|
}
|
|
|
|
elsif ($_->{action} eq 'deny') {
|
2014-09-18 03:28:51 +02:00
|
|
|
return DENY, $_->{desc};
|
2013-04-21 06:50:39 +02:00
|
|
|
}
|
|
|
|
elsif ($_->{action} eq 'denysoft') {
|
2014-09-18 03:28:51 +02:00
|
|
|
return DENYSOFT, $_->{desc};
|
2007-10-23 10:47:38 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return DECLINED;
|
|
|
|
}
|
|
|
|
|
2020-06-09 00:48:39 +02:00
|
|
|
sub get_tlds4regex {
|
|
|
|
|
|
|
|
# TLD list from http://data.iana.org/TLD/tlds-alpha-by-domain.txt as of 2017-03-03
|
|
|
|
|
|
|
|
return 'aaa|aarp|abarth|abb|abbott|abbvie|abc|able|abogado|abudhabi|ac|academy|
|
|
|
|
accenture|accountant|accountants|aco|active|actor|ad|adac|ads|adult|ae|aeg|aero|
|
|
|
|
aetna|af|afamilycompany|afl|ag|agakhan|agency|ai|aig|aigo|airbus|airforce|
|
|
|
|
airtel|akdn|al|alfaromeo|alibaba|alipay|allfinanz|allstate|ally|alsace|alstom|
|
|
|
|
am|americanexpress|americanfamily|amex|amfam|amica|amsterdam|analytics|android|
|
|
|
|
anquan|anz|ao|aol|apartments|app|apple|aq|aquarelle|ar|aramco|archi|army|arpa|
|
|
|
|
art|arte|as|asda|asia|associates|at|athleta|attorney|au|auction|audi|audible|
|
|
|
|
audio|auspost|author|auto|autos|avianca|aw|aws|ax|axa|az|azure|ba|baby|baidu|
|
|
|
|
banamex|bananarepublic|band|bank|bar|barcelona|barclaycard|barclays|barefoot|
|
|
|
|
bargains|baseball|basketball|bauhaus|bayern|bb|bbc|bbt|bbva|bcg|bcn|bd|be|beats|
|
|
|
|
beauty|beer|bentley|berlin|best|bestbuy|bet|bf|bg|bh|bharti|bi|bible|bid|bike|
|
|
|
|
bing|bingo|bio|biz|bj|black|blackfriday|blanco|blockbuster|blog|bloomberg|blue|
|
|
|
|
bm|bms|bmw|bn|bnl|bnpparibas|bo|boats|boehringer|bofa|bom|bond|boo|book|booking|
|
|
|
|
boots|bosch|bostik|bot|boutique|br|bradesco|bridgestone|broadway|broker|brother|
|
|
|
|
brussels|bs|bt|budapest|bugatti|build|builders|business|buy|buzz|bv|bw|by|bz|
|
|
|
|
bzh|ca|cab|cafe|cal|call|calvinklein|cam|camera|camp|cancerresearch|canon|
|
|
|
|
capetown|capital|capitalone|car|caravan|cards|care|career|careers|cars|cartier|
|
|
|
|
casa|case|caseih|cash|casino|cat|catering|cba|cbn|cbre|cbs|cc|cd|ceb|center|ceo|
|
|
|
|
cern|cf|cfa|cfd|cg|ch|chanel|channel|chase|chat|cheap|chintai|chloe|christmas|
|
|
|
|
chrome|chrysler|church|ci|cipriani|circle|cisco|citadel|citi|citic|city|
|
|
|
|
cityeats|ck|cl|claims|cleaning|click|clinic|clinique|clothing|cloud|club|
|
|
|
|
clubmed|cm|cn|co|coach|codes|coffee|college|cologne|com|comcast|commbank|
|
|
|
|
community|company|compare|computer|comsec|condos|construction|consulting|
|
|
|
|
contact|contractors|cooking|cookingchannel|cool|coop|corsica|country|coupon|
|
|
|
|
coupons|courses|cr|credit|creditcard|creditunion|cricket|crown|crs|cruises|csc|
|
|
|
|
cu|cuisinella|cv|cw|cx|cy|cymru|cyou|cz|dabur|dad|dance|date|dating|datsun|day|
|
|
|
|
dclk|dds|de|deal|dealer|deals|degree|delivery|dell|deloitte|delta|democrat|
|
|
|
|
dental|dentist|desi|design|dev|dhl|diamonds|diet|digital|direct|directory|
|
|
|
|
discount|discover|dish|diy|dj|dk|dm|dnp|do|docs|doctor|dodge|dog|doha|domains|
|
|
|
|
dot|download|drive|dtv|dubai|duck|dunlop|duns|dupont|durban|dvag|dvr|dz|earth|
|
|
|
|
eat|ec|eco|edeka|edu|education|ee|eg|email|emerck|energy|engineer|engineering|
|
|
|
|
enterprises|epost|epson|equipment|er|ericsson|erni|es|esq|estate|esurance|et|eu|
|
|
|
|
eurovision|eus|events|everbank|exchange|expert|exposed|express|extraspace|fage|
|
|
|
|
fail|fairwinds|faith|family|fan|fans|farm|farmers|fashion|fast|fedex|feedback|
|
|
|
|
ferrari|ferrero|fi|fiat|fidelity|fido|film|final|finance|financial|fire|
|
|
|
|
firestone|firmdale|fish|fishing|fit|fitness|fj|fk|flickr|flights|flir|florist|
|
|
|
|
flowers|fly|fm|fo|foo|foodnetwork|football|ford|forex|forsale|forum|foundation|
|
|
|
|
fox|fr|free|fresenius|frl|frogans|frontdoor|frontier|ftr|fujitsu|fujixerox|fund|
|
|
|
|
furniture|futbol|fyi|ga|gal|gallery|gallo|gallup|game|games|gap|garden|gb|gbiz|
|
|
|
|
gd|gdn|ge|gea|gent|genting|george|gf|gg|ggee|gh|gi|gift|gifts|gives|giving|gl|
|
|
|
|
glade|glass|gle|global|globo|gm|gmail|gmbh|gmo|gmx|gn|godaddy|gold|goldpoint|
|
|
|
|
golf|goo|goodhands|goodyear|goog|google|gop|got|gov|gp|gq|gr|grainger|graphics|
|
|
|
|
gratis|green|gripe|group|gs|gt|gu|guardian|gucci|guge|guide|guitars|guru|gw|gy|
|
|
|
|
hamburg|hangout|haus|hbo|hdfc|hdfcbank|health|healthcare|help|helsinki|here|
|
|
|
|
hermes|hgtv|hiphop|hisamitsu|hitachi|hiv|hk|hkt|hm|hn|hockey|holdings|holiday|
|
|
|
|
homedepot|homegoods|homes|homesense|honda|honeywell|horse|host|hosting|hot|
|
|
|
|
hoteles|hotmail|house|how|hr|hsbc|ht|htc|hu|hughes|hyatt|hyundai|ibm|icbc|ice|
|
|
|
|
icu|id|ie|ieee|ifm|iinet|ikano|il|im|imamat|imdb|immo|immobilien|in|industries|
|
|
|
|
infiniti|info|ing|ink|institute|insurance|insure|int|intel|international|intuit|
|
|
|
|
investments|io|ipiranga|iq|ir|irish|is|iselect|ismaili|ist|istanbul|it|itau|itv|
|
|
|
|
iveco|iwc|jaguar|java|jcb|jcp|je|jeep|jetzt|jewelry|jlc|jll|jm|jmp|jnj|jo|jobs|
|
|
|
|
joburg|jot|joy|jp|jpmorgan|jprs|juegos|juniper|kaufen|kddi|ke|kerryhotels|
|
|
|
|
kerrylogistics|kerryproperties|kfh|kg|kh|ki|kia|kim|kinder|kindle|kitchen|kiwi|
|
|
|
|
km|kn|koeln|komatsu|kosher|kp|kpmg|kpn|kr|krd|kred|kuokgroup|kw|ky|kyoto|kz|la|
|
|
|
|
lacaixa|ladbrokes|lamborghini|lamer|lancaster|lancia|lancome|land|landrover|
|
|
|
|
lanxess|lasalle|lat|latino|latrobe|law|lawyer|lb|lc|lds|lease|leclerc|lefrak|
|
|
|
|
legal|lego|lexus|lgbt|li|liaison|lidl|life|lifeinsurance|lifestyle|lighting|
|
|
|
|
like|lilly|limited|limo|lincoln|linde|link|lipsy|live|living|lixil|lk|loan|
|
|
|
|
loans|locker|locus|loft|lol|london|lotte|lotto|love|lpl|lplfinancial|lr|ls|lt|
|
|
|
|
ltd|ltda|lu|lundbeck|lupin|luxe|luxury|lv|ly|ma|macys|madrid|maif|maison|makeup|
|
|
|
|
man|management|mango|market|marketing|markets|marriott|marshalls|maserati|
|
|
|
|
mattel|mba|mc|mcd|mcdonalds|mckinsey|md|me|med|media|meet|melbourne|meme|
|
|
|
|
memorial|men|menu|meo|metlife|mg|mh|miami|microsoft|mil|mini|mint|mit|
|
|
|
|
mitsubishi|mk|ml|mlb|mls|mm|mma|mn|mo|mobi|mobily|moda|moe|moi|mom|monash|money|
|
|
|
|
monster|montblanc|mopar|mormon|mortgage|moscow|motorcycles|mov|movie|movistar|
|
|
|
|
mp|mq|mr|ms|msd|mt|mtn|mtpc|mtr|mu|museum|mutual|mutuelle|mv|mw|mx|my|mz|na|nab|
|
|
|
|
nadex|nagoya|name|nationwide|natura|navy|nba|nc|ne|nec|net|netbank|netflix|
|
|
|
|
network|neustar|new|newholland|news|next|nextdirect|nexus|nf|nfl|ng|ngo|nhk|ni|
|
|
|
|
nico|nike|nikon|ninja|nissan|nissay|nl|no|nokia|northwesternmutual|norton|now|
|
|
|
|
nowruz|nowtv|np|nr|nra|nrw|ntt|nu|nyc|nz|obi|observer|off|office|okinawa|olayan|
|
|
|
|
olayangroup|oldnavy|ollo|om|omega|one|ong|onl|online|onyourside|ooo|open|oracle|
|
|
|
|
orange|org|organic|orientexpress|origins|osaka|otsuka|ott|ovh|pa|page|
|
|
|
|
pamperedchef|panasonic|panerai|paris|pars|partners|parts|party|passagens|pay|
|
|
|
|
pccw|pe|pet|pf|pfizer|pg|ph|pharmacy|philips|photo|photography|photos|physio|
|
|
|
|
piaget|pics|pictet|pictures|pid|pin|ping|pink|pioneer|pizza|pk|pl|place|play|
|
|
|
|
playstation|plumbing|plus|pm|pn|pnc|pohl|poker|politie|porn|post|pr|pramerica|
|
|
|
|
praxi|press|prime|pro|prod|productions|prof|progressive|promo|properties|
|
|
|
|
property|protection|pru|prudential|ps|pt|pub|pw|pwc|py|qa|qpon|quebec|quest|qvc|
|
|
|
|
racing|radio|raid|re|read|realestate|realtor|realty|recipes|red|redstone|
|
|
|
|
redumbrella|rehab|reise|reisen|reit|ren|rent|rentals|repair|report|republican|
|
|
|
|
rest|restaurant|review|reviews|rexroth|rich|richardli|ricoh|rightathome|rio|rip|
|
|
|
|
ro|rocher|rocks|rodeo|rogers|room|rs|rsvp|ru|ruhr|run|rw|rwe|ryukyu|sa|saarland|
|
|
|
|
safe|safety|sakura|sale|salon|samsclub|samsung|sandvik|sandvikcoromant|sanofi|
|
|
|
|
sap|sapo|sarl|sas|save|saxo|sb|sbi|sbs|sc|sca|scb|schaeffler|schmidt|
|
|
|
|
scholarships|school|schule|schwarz|science|scjohnson|scor|scot|sd|se|seat|
|
|
|
|
secure|security|seek|select|sener|services|ses|seven|sew|sex|sexy|sfr|sg|sh|
|
|
|
|
shangrila|sharp|shaw|shell|shia|shiksha|shoes|shop|shopping|shouji|show|
|
|
|
|
showtime|shriram|si|silk|sina|singles|site|sj|sk|ski|skin|sky|skype|sl|sling|sm|
|
|
|
|
smart|smile|sn|sncf|so|soccer|social|softbank|software|sohu|solar|solutions|
|
|
|
|
song|sony|soy|space|spiegel|spot|spreadbetting|sr|srl|srt|st|stada|staples|star|
|
|
|
|
starhub|statebank|statefarm|statoil|stc|stcgroup|stockholm|storage|store|stream|
|
|
|
|
studio|study|style|su|sucks|supplies|supply|support|surf|surgery|suzuki|sv|
|
|
|
|
swatch|swiftcover|swiss|sx|sy|sydney|symantec|systems|sz|tab|taipei|talk|taobao|
|
|
|
|
target|tatamotors|tatar|tattoo|tax|taxi|tc|tci|td|tdk|team|tech|technology|tel|
|
|
|
|
telecity|telefonica|temasek|tennis|teva|tf|tg|th|thd|theater|theatre|tiaa|
|
|
|
|
tickets|tienda|tiffany|tips|tires|tirol|tj|tjmaxx|tjx|tk|tkmaxx|tl|tm|tmall|tn|
|
|
|
|
to|today|tokyo|tools|top|toray|toshiba|total|tours|town|toyota|toys|tr|trade|
|
|
|
|
trading|training|travel|travelchannel|travelers|travelersinsurance|trust|trv|tt|
|
|
|
|
tube|tui|tunes|tushu|tv|tvs|tw|tz|ua|ubank|ubs|uconnect|ug|uk|unicom|university|
|
|
|
|
uno|uol|ups|us|uy|uz|va|vacations|vana|vanguard|vc|ve|vegas|ventures|verisign|
|
|
|
|
versicherung|vet|vg|vi|viajes|video|vig|viking|villas|vin|vip|virgin|visa|
|
|
|
|
vision|vista|vistaprint|viva|vivo|vlaanderen|vn|vodka|volkswagen|volvo|vote|
|
|
|
|
voting|voto|voyage|vu|vuelos|wales|walmart|walter|wang|wanggou|warman|watch|
|
|
|
|
watches|weather|weatherchannel|webcam|weber|website|wed|wedding|weibo|weir|wf|
|
|
|
|
whoswho|wien|wiki|williamhill|win|windows|wine|winners|wme|wolterskluwer|
|
|
|
|
woodside|work|works|world|wow|ws|wtc|wtf|xbox|xerox|xfinity|xihuan|xin|xperia|
|
|
|
|
xxx|xyz|yachts|yahoo|yamaxun|yandex|ye|yodobashi|yoga|yokohama|you|youtube|yt|
|
|
|
|
yun|za|zappos|zara|zero|zip|zippo|zm|zone|zuerich|zw';
|
|
|
|
}
|
|
|
|
|
|
|
|
|