qpsmtpd/lib/Qpsmtpd/Address.pm
Matt Simerson d80b117bff replace all instances of split '' with split //
newer versions of perl don't accept split '' syntax any longer
2013-08-05 15:05:10 -07:00

363 lines
10 KiB
Perl

#!/usr/bin/perl -w
package Qpsmtpd::Address;
use strict;
=head1 NAME
Qpsmtpd::Address - Lightweight E-Mail address objects
=head1 DESCRIPTION
Based originally on cut and paste from Mail::Address and including
every jot and tittle from RFC-2821/2822 on what is a legal e-mail
address for use during the SMTP transaction.
=head1 USAGE
my $rcpt = Qpsmtpd::Address->new('<email.address@example.com>');
The objects created can be used as is, since they automatically
stringify to a standard form, and they have an overloaded comparison
for easy testing of values.
=head1 METHODS
=cut
use overload (
'""' => \&format,
'cmp' => \&_addr_cmp,
);
=head2 new()
Can be called two ways:
=over 4
=item * Qpsmtpd::Address->new('<full_address@example.com>')
The normal mode of operation is to pass the entire contents of the
RCPT TO: command from the SMTP transaction. The value will be fully
parsed via the L<canonify> method, using the full RFC 2821 rules.
=item * Qpsmtpd::Address->new("user", "host")
If the caller has already split the address from the domain/host,
this mode will not L<canonify> the input values. This is not
recommended in cases of user-generated input for that reason. This
can be used to generate Qpsmtpd::Address objects for accounts like
"<postmaster>" or indeed for the bounce address "<>".
=back
The resulting objects can be stored in arrays or used in plugins to
test for equality (like in badmailfrom).
=cut
sub new {
my ($class, $user, $host) = @_;
my $self = {};
if ($user =~ /^<(.*)>$/ ) {
($user, $host) = $class->canonify($user);
return undef unless defined $user;
}
elsif ( not defined $host ) {
my $address = $user;
($user, $host) = $address =~ m/(.*)(?:\@(.*))/;
}
$self->{_user} = $user;
$self->{_host} = $host;
return bless $self, $class;
}
# Definition of an address ("path") from RFC 2821:
#
# Path = "<" [ A-d-l ":" ] Mailbox ">"
#
# A-d-l = At-domain *( "," A-d-l )
# ; Note that this form, the so-called "source route",
# ; MUST BE accepted, SHOULD NOT be generated, and SHOULD be
# ; ignored.
#
# At-domain = "@" domain
#
# Mailbox = Local-part "@" Domain
#
# Local-part = Dot-string / Quoted-string
# ; MAY be case-sensitive
#
# Dot-string = Atom *("." Atom)
#
# Atom = 1*atext
#
# Quoted-string = DQUOTE *qcontent DQUOTE
#
# Domain = (sub-domain 1*("." sub-domain)) / address-literal
# sub-domain = Let-dig [Ldh-str]
#
# address-literal = "[" IPv4-address-literal /
# IPv6-address-literal /
# General-address-literal "]"
#
# IPv4-address-literal = Snum 3("." Snum)
# IPv6-address-literal = "IPv6:" IPv6-addr
# General-address-literal = Standardized-tag ":" 1*dcontent
# Standardized-tag = Ldh-str
# ; MUST be specified in a standards-track RFC
# ; and registered with IANA
#
# Snum = 1*3DIGIT ; representing a decimal integer
# ; value in the range 0 through 255
# Let-dig = ALPHA / DIGIT
# Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig
#
# IPv6-addr = IPv6-full / IPv6-comp / IPv6v4-full / IPv6v4-comp
# IPv6-hex = 1*4HEXDIG
# IPv6-full = IPv6-hex 7(":" IPv6-hex)
# IPv6-comp = [IPv6-hex *5(":" IPv6-hex)] "::" [IPv6-hex *5(":"
# IPv6-hex)]
# ; The "::" represents at least 2 16-bit groups of zeros
# ; No more than 6 groups in addition to the "::" may be
# ; present
# IPv6v4-full = IPv6-hex 5(":" IPv6-hex) ":" IPv4-address-literal
# IPv6v4-comp = [IPv6-hex *3(":" IPv6-hex)] "::"
# [IPv6-hex *3(":" IPv6-hex) ":"] IPv4-address-literal
# ; The "::" represents at least 2 16-bit groups of zeros
# ; No more than 4 groups in addition to the "::" and
# ; IPv4-address-literal may be present
#
#
#
# atext and qcontent are not defined in RFC 2821.
# From RFC 2822:
#
# atext = ALPHA / DIGIT / ; Any character except controls,
# "!" / "#" / ; SP, and specials.
# "$" / "%" / ; Used for atoms
# "&" / "'" /
# "*" / "+" /
# "-" / "/" /
# "=" / "?" /
# "^" / "_" /
# "`" / "{" /
# "|" / "}" /
# "~"
# qtext = NO-WS-CTL / ; Non white space controls
#
# %d33 / ; The rest of the US-ASCII
# %d35-91 / ; characters not including "\"
# %d93-126 ; or the quote character
#
# qcontent = qtext / quoted-pair
#
# NO-WS-CTL = %d1-8 / ; US-ASCII control characters
# %d11 / ; that do not include the
# %d12 / ; carriage return, line feed,
# %d14-31 / ; and white space characters
# %d127
#
# quoted-pair = ("\" text) / obs-qp
#
# text = %d1-9 / ; Characters excluding CR and LF
# %d11 /
# %d12 /
# %d14-127 /
# obs-text
#
#
# (We ignore all obs forms)
=head2 canonify()
Primarily an internal method, it is used only on the path portion of
an e-mail message, as defined in RFC-2821 (this is the part inside the
angle brackets and does not include the "human readable" portion of an
address). It returns a list of (local-part, domain).
=cut
# address components are defined as package variables so that they can
# be overriden (in hook_pre_connection, for example) if people have
# different needs.
our $atom_expr = '[a-zA-Z0-9!#%&*+=?^_`{|}~\$\x27\x2D\/]+';
our $address_literal_expr =
'(?:\[(?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|IPv6:[0-9A-Fa-f:.]+)\])';
our $subdomain_expr = '(?:[a-zA-Z0-9](?:[-a-zA-Z0-9]*[a-zA-Z0-9])?)';
our $domain_expr;
our $qtext_expr = '[\x01-\x08\x0B\x0C\x0E-\x1F\x21\x23-\x5B\x5D-\x7F]';
our $text_expr = '[\x01-\x09\x0B\x0C\x0E-\x7F]';
sub canonify {
my ($dummy, $path) = @_;
# strip delimiters
return undef unless ($path =~ /^<(.*)>$/);
$path = $1;
my $domain = $domain_expr ? $domain_expr
: "$subdomain_expr(?:\.$subdomain_expr)*";
# it is possible for $address_literal_expr to be empty, if a site
# doesn't want to allow them
$domain = "(?:$address_literal_expr|$domain)"
if !$domain_expr and $address_literal_expr;
# strip source route
$path =~ s/^\@$domain(?:,\@$domain)*://;
# empty path is ok
return "" if $path eq "";
# bare postmaster is permissible, perl RFC-2821 (4.5.1)
return ("postmaster", undef) if $path =~ m/^postmaster$/i;
my ($localpart, $domainpart) = ($path =~ /^(.*)\@($domain)$/);
return (undef) unless defined $localpart;
if ($localpart =~ /^$atom_expr(\.$atom_expr)*/) {
# simple case, we are done
return ($localpart, $domainpart);
}
if ($localpart =~ /^"(($qtext_expr|\\$text_expr)*)"$/) {
$localpart = $1;
$localpart =~ s/\\($text_expr)/$1/g;
return ($localpart, $domainpart);
}
return (undef);
}
=head2 parse()
Retained as a compatibility method, it is completely equivalent
to new() called with a single parameter.
=cut
sub parse { # retain for compatibility only
return shift->new(shift);
}
=head2 address()
Can be used to reset the value of an existing Q::A object, in which
case it takes a parameter with or without the angle brackets.
Returns the stringified representation of the address. NOTE: does
not escape any of the characters that need escaping, nor does it
include the surrounding angle brackets. For that purpose, see
L<format>.
=cut
sub address {
my ($self, $val) = @_;
if ( defined($val) ) {
$val = "<$val>" unless $val =~ /^<.+>$/;
my ($user, $host) = $self->canonify($val);
$self->{_user} = $user;
$self->{_host} = $host;
}
return ( defined $self->{_user} ? $self->{_user} : '' )
. ( defined $self->{_host} ? '@'.$self->{_host} : '' );
}
=head2 format()
Returns the canonical stringified representation of the address. It
does escape any characters requiring it (per RFC-2821/2822) and it
does include the surrounding angle brackets. It is also the default
stringification operator, so the following are equivalent:
print $rcpt->format();
print $rcpt;
=cut
sub format {
my ($self) = @_;
my $qchar = '[^a-zA-Z0-9!#\$\%\&\x27\*\+\x2D\/=\?\^_`{\|}~.]';
return '<>' unless defined $self->{_user};
if ( ( my $user = $self->{_user}) =~ s/($qchar)/\\$1/g) {
return qq(<"$user")
. ( defined $self->{_host} ? '@'.$self->{_host} : '' ). ">";
}
return "<".$self->address().">";
}
=head2 user([$user])
Returns the "localpart" of the address, per RFC-2821, or the portion
before the '@' sign.
If called with one parameter, the localpart is set and the new value is
returned.
=cut
sub user {
my ($self, $user) = @_;
$self->{_user} = $user if defined $user;
return $self->{_user};
}
=head2 host([$host])
Returns the "domain" part of the address, per RFC-2821, or the portion
after the '@' sign.
If called with one parameter, the domain is set and the new value is
returned.
=cut
sub host {
my ($self, $host) = @_;
$self->{_host} = $host if defined $host;
return $self->{_host};
}
=head2 notes($key[,$value])
Get or set a note on the address. This is a piece of data that you wish
to attach to the address and read somewhere else. For example you can
use this to pass data between plugins.
=cut
sub notes {
my ($self,$key) = (shift,shift);
# Check for any additional arguments passed by the caller -- including undef
return $self->{_notes}->{$key} unless @_;
return $self->{_notes}->{$key} = shift;
}
sub _addr_cmp {
require UNIVERSAL;
my ($left, $right, $swap) = @_;
my $class = ref($left);
unless ( UNIVERSAL::isa($right, $class) ) {
$right = $class->new($right);
}
#invert the address so we can sort by domain then user
($left = join( '=', reverse( split(/@/, $left->format))) ) =~ tr/[<>]//d;
($right = join( '=', reverse( split(/@/,$right->format))) ) =~ tr/[<>]//d;
if ( $swap ) {
($right, $left) = ($left, $right);
}
return ($left cmp $right);
}
=head1 COPYRIGHT
Copyright 2004-2005 Peter J. Holzer. See the LICENSE file for more
information.
=cut
1;