qpsmtpd/lib/Qpsmtpd/Address.pm
Jared Johnson d9cf61175a Add Qpsmtpd::Address::notes() method
Qpsmtpd Connection and Transaction objects support notes() methods which
are conventionally used to pass data between plugins.  This adds the
same functionality to Address objects.  This should make it easier for
plugins to begin supporting message handling based on per-user
configuration directives.

Signed-off-by: Ask Bjørn Hansen <ask@develooper.com>
2009-02-20 07:45:03 -06:00

363 lines
10 KiB
Perl

#!/usr/bin/perl -w
package Qpsmtpd::Address;
use strict;
=head1 NAME
Qpsmtpd::Address - Lightweight E-Mail address objects
=head1 DESCRIPTION
Based originally on cut and paste from Mail::Address and including
every jot and tittle from RFC-2821/2822 on what is a legal e-mail
address for use during the SMTP transaction.
=head1 USAGE
my $rcpt = Qpsmtpd::Address->new('<email.address@example.com>');
The objects created can be used as is, since they automatically
stringify to a standard form, and they have an overloaded comparison
for easy testing of values.
=head1 METHODS
=cut
use overload (
'""' => \&format,
'cmp' => \&_addr_cmp,
);
=head2 new()
Can be called two ways:
=over 4
=item * Qpsmtpd::Address->new('<full_address@example.com>')
The normal mode of operation is to pass the entire contents of the
RCPT TO: command from the SMTP transaction. The value will be fully
parsed via the L<canonify> method, using the full RFC 2821 rules.
=item * Qpsmtpd::Address->new("user", "host")
If the caller has already split the address from the domain/host,
this mode will not L<canonify> the input values. This is not
recommended in cases of user-generated input for that reason. This
can be used to generate Qpsmtpd::Address objects for accounts like
"<postmaster>" or indeed for the bounce address "<>".
=back
The resulting objects can be stored in arrays or used in plugins to
test for equality (like in badmailfrom).
=cut
sub new {
my ($class, $user, $host) = @_;
my $self = {};
if ($user =~ /^<(.*)>$/ ) {
($user, $host) = $class->canonify($user);
return undef unless defined $user;
}
elsif ( not defined $host ) {
my $address = $user;
($user, $host) = $address =~ m/(.*)(?:\@(.*))/;
}
$self->{_user} = $user;
$self->{_host} = $host;
return bless $self, $class;
}
# Definition of an address ("path") from RFC 2821:
#
# Path = "<" [ A-d-l ":" ] Mailbox ">"
#
# A-d-l = At-domain *( "," A-d-l )
# ; Note that this form, the so-called "source route",
# ; MUST BE accepted, SHOULD NOT be generated, and SHOULD be
# ; ignored.
#
# At-domain = "@" domain
#
# Mailbox = Local-part "@" Domain
#
# Local-part = Dot-string / Quoted-string
# ; MAY be case-sensitive
#
# Dot-string = Atom *("." Atom)
#
# Atom = 1*atext
#
# Quoted-string = DQUOTE *qcontent DQUOTE
#
# Domain = (sub-domain 1*("." sub-domain)) / address-literal
# sub-domain = Let-dig [Ldh-str]
#
# address-literal = "[" IPv4-address-literal /
# IPv6-address-literal /
# General-address-literal "]"
#
# IPv4-address-literal = Snum 3("." Snum)
# IPv6-address-literal = "IPv6:" IPv6-addr
# General-address-literal = Standardized-tag ":" 1*dcontent
# Standardized-tag = Ldh-str
# ; MUST be specified in a standards-track RFC
# ; and registered with IANA
#
# Snum = 1*3DIGIT ; representing a decimal integer
# ; value in the range 0 through 255
# Let-dig = ALPHA / DIGIT
# Ldh-str = *( ALPHA / DIGIT / "-" ) Let-dig
#
# IPv6-addr = IPv6-full / IPv6-comp / IPv6v4-full / IPv6v4-comp
# IPv6-hex = 1*4HEXDIG
# IPv6-full = IPv6-hex 7(":" IPv6-hex)
# IPv6-comp = [IPv6-hex *5(":" IPv6-hex)] "::" [IPv6-hex *5(":"
# IPv6-hex)]
# ; The "::" represents at least 2 16-bit groups of zeros
# ; No more than 6 groups in addition to the "::" may be
# ; present
# IPv6v4-full = IPv6-hex 5(":" IPv6-hex) ":" IPv4-address-literal
# IPv6v4-comp = [IPv6-hex *3(":" IPv6-hex)] "::"
# [IPv6-hex *3(":" IPv6-hex) ":"] IPv4-address-literal
# ; The "::" represents at least 2 16-bit groups of zeros
# ; No more than 4 groups in addition to the "::" and
# ; IPv4-address-literal may be present
#
#
#
# atext and qcontent are not defined in RFC 2821.
# From RFC 2822:
#
# atext = ALPHA / DIGIT / ; Any character except controls,
# "!" / "#" / ; SP, and specials.
# "$" / "%" / ; Used for atoms
# "&" / "'" /
# "*" / "+" /
# "-" / "/" /
# "=" / "?" /
# "^" / "_" /
# "`" / "{" /
# "|" / "}" /
# "~"
# qtext = NO-WS-CTL / ; Non white space controls
#
# %d33 / ; The rest of the US-ASCII
# %d35-91 / ; characters not including "\"
# %d93-126 ; or the quote character
#
# qcontent = qtext / quoted-pair
#
# NO-WS-CTL = %d1-8 / ; US-ASCII control characters
# %d11 / ; that do not include the
# %d12 / ; carriage return, line feed,
# %d14-31 / ; and white space characters
# %d127
#
# quoted-pair = ("\" text) / obs-qp
#
# text = %d1-9 / ; Characters excluding CR and LF
# %d11 /
# %d12 /
# %d14-127 /
# obs-text
#
#
# (We ignore all obs forms)
=head2 canonify()
Primarily an internal method, it is used only on the path portion of
an e-mail message, as defined in RFC-2821 (this is the part inside the
angle brackets and does not include the "human readable" portion of an
address). It returns a list of (local-part, domain).
=cut
# address components are defined as package variables so that they can
# be overriden (in hook_pre_connection, for example) if people have
# different needs.
our $atom_expr = '[a-zA-Z0-9!#%&*+=?^_`{|}~\$\x27\x2D\/]+';
our $address_literal_expr =
'(?:\[(?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|IPv6:[0-9A-Fa-f:.]+)\])';
our $subdomain_expr = '(?:[a-zA-Z0-9](?:[-a-zA-Z0-9]*[a-zA-Z0-9])?)';
our $domain_expr;
our $qtext_expr = '[\x01-\x08\x0B\x0C\x0E-\x1F\x21\x23-\x5B\x5D-\x7F]';
our $text_expr = '[\x01-\x09\x0B\x0C\x0E-\x7F]';
sub canonify {
my ($dummy, $path) = @_;
# strip delimiters
return undef unless ($path =~ /^<(.*)>$/);
$path = $1;
my $domain = $domain_expr ? $domain_expr
: "$subdomain_expr(?:\.$subdomain_expr)*";
# it is possible for $address_literal_expr to be empty, if a site
# doesn't want to allow them
$domain = "(?:$address_literal_expr|$domain)"
if !$domain_expr and $address_literal_expr;
# strip source route
$path =~ s/^\@$domain(?:,\@$domain)*://;
# empty path is ok
return "" if $path eq "";
# bare postmaster is permissible, perl RFC-2821 (4.5.1)
return ("postmaster", undef) if $path eq "postmaster";
my ($localpart, $domainpart) = ($path =~ /^(.*)\@($domain)$/);
return (undef) unless defined $localpart;
if ($localpart =~ /^$atom_expr(\.$atom_expr)*/) {
# simple case, we are done
return ($localpart, $domainpart);
}
if ($localpart =~ /^"(($qtext_expr|\\$text_expr)*)"$/) {
$localpart = $1;
$localpart =~ s/\\($text_expr)/$1/g;
return ($localpart, $domainpart);
}
return (undef);
}
=head2 parse()
Retained as a compatibility method, it is completely equivalent
to new() called with a single parameter.
=cut
sub parse { # retain for compatibility only
return shift->new(shift);
}
=head2 address()
Can be used to reset the value of an existing Q::A object, in which
case it takes a parameter with or without the angle brackets.
Returns the stringified representation of the address. NOTE: does
not escape any of the characters that need escaping, nor does it
include the surrounding angle brackets. For that purpose, see
L<format>.
=cut
sub address {
my ($self, $val) = @_;
if ( defined($val) ) {
$val = "<$val>" unless $val =~ /^<.+>$/;
my ($user, $host) = $self->canonify($val);
$self->{_user} = $user;
$self->{_host} = $host;
}
return ( defined $self->{_user} ? $self->{_user} : '' )
. ( defined $self->{_host} ? '@'.$self->{_host} : '' );
}
=head2 format()
Returns the canonical stringified representation of the address. It
does escape any characters requiring it (per RFC-2821/2822) and it
does include the surrounding angle brackets. It is also the default
stringification operator, so the following are equivalent:
print $rcpt->format();
print $rcpt;
=cut
sub format {
my ($self) = @_;
my $qchar = '[^a-zA-Z0-9!#\$\%\&\x27\*\+\x2D\/=\?\^_`{\|}~.]';
return '<>' unless defined $self->{_user};
if ( ( my $user = $self->{_user}) =~ s/($qchar)/\\$1/g) {
return qq(<"$user")
. ( defined $self->{_host} ? '@'.$self->{_host} : '' ). ">";
}
return "<".$self->address().">";
}
=head2 user([$user])
Returns the "localpart" of the address, per RFC-2821, or the portion
before the '@' sign.
If called with one parameter, the localpart is set and the new value is
returned.
=cut
sub user {
my ($self, $user) = @_;
$self->{_user} = $user if defined $user;
return $self->{_user};
}
=head2 host([$host])
Returns the "domain" part of the address, per RFC-2821, or the portion
after the '@' sign.
If called with one parameter, the domain is set and the new value is
returned.
=cut
sub host {
my ($self, $host) = @_;
$self->{_host} = $host if defined $host;
return $self->{_host};
}
=head2 notes($key[,$value])
Get or set a note on the address. This is a piece of data that you wish
to attach to the address and read somewhere else. For example you can
use this to pass data between plugins.
=cut
sub notes {
my ($self,$key) = (shift,shift);
# Check for any additional arguments passed by the caller -- including undef
return $self->{_notes}->{$key} unless @_;
return $self->{_notes}->{$key} = shift;
}
sub _addr_cmp {
require UNIVERSAL;
my ($left, $right, $swap) = @_;
my $class = ref($left);
unless ( UNIVERSAL::isa($right, $class) ) {
$right = $class->new($right);
}
#invert the address so we can sort by domain then user
($left = join( '=', reverse( split('@', $left->format))) ) =~ tr/[<>]//d;
($right = join( '=', reverse( split('@',$right->format))) ) =~ tr/[<>]//d;
if ( $swap ) {
($right, $left) = ($left, $right);
}
return ($left cmp $right);
}
=head1 COPYRIGHT
Copyright 2004-2005 Peter J. Holzer. See the LICENSE file for more
information.
=cut
1;