From 356ec2f08d5168ce97849c0466cb7e757ea0c9ba Mon Sep 17 00:00:00 2001 From: Robert Spier Date: Mon, 19 May 2008 07:22:51 +0000 Subject: [PATCH] standardize hostname regex. use latest list of tlds. import constants so we can syntax check git-svn-id: https://svn.perl.org/qpsmtpd/trunk@914 958fd67b-6ff1-0310-b445-bb7760255be9 --- plugins/uribl | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/plugins/uribl b/plugins/uribl index a9454ed..4a686bc 100644 --- a/plugins/uribl +++ b/plugins/uribl @@ -95,6 +95,8 @@ use Net::DNS::Resolver; use Time::HiRes qw(time); use IO::Select; +use Qpsmtpd::Constants; + use strict; use warnings; @@ -323,9 +325,13 @@ sub data_handler { } } while ($l =~ m{ - ([Ww]{3,3}\.[\w\-.]+\.[a-zA-Z]{2,32}| # www.hostname - [a-zA-Z0-9][a-zA-Z0-9\-.]+\. # hostname. ... - (?:com|net|org|biz|info|[a-zA-Z]{2,2}))(?!\w) # (cc)TLD + ((?:www\.)? # www? + [a-zA-Z0-9][a-zA-Z0-9\-.]+\. # hostname + (?:aero|arpa|asia|biz|cat|com|coop| # tld + edu|gov|info|int|jobs|mil|mobi| + museum|name|net|org|pro|tel|travel + com|net|org|biz|info|[a-zA-Z]{2}) + )(?!\w) }gix) { my $host = lc $1; my @host_domains = split /\./, $host; @@ -352,8 +358,12 @@ sub data_handler { while ($l =~ m{ \w{3,16}:/+ # protocol (?:\S+@)? # user/pass - ([\w\-.]+\.[a-zA-Z]{2,32}) # hostname - }gx) { + [a-zA-Z0-9][a-zA-Z0-9\-.]+\. # hostname + (?:aero|arpa|asia|biz|cat|com|coop| # tld + edu|gov|info|int|jobs|mil|mobi| + museum|name|net|org|pro|tel|travel + com|net|org|biz|info|[a-zA-Z]{2}) + }gix) { my $host = lc $1; my @host_domains = split /\./, $host; $self->log(LOGDEBUG, "uribl: matched full URI hostname $host");