[Python-checkins] python/dist/src/Lib rfc822.py,1.54.4.1,1.54.4.2
bwarsaw@sourceforge.net
bwarsaw@sourceforge.net
Thu, 02 May 2002 21:23:04 -0700
Update of /cvsroot/python/python/dist/src/Lib
In directory usw-pr-cvs1:/tmp/cvs-serv12422/Lib
Modified Files:
Tag: release21-maint
rfc822.py
Log Message:
Backport patch for revision 1.58 to the Py2.1.x maintenance branch.
This fixes parseaddr() for the following RFC 2822 valid field:
To: User J. Person <person@dom.ain>
Index: rfc822.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/rfc822.py,v
retrieving revision 1.54.4.1
retrieving revision 1.54.4.2
diff -C2 -d -r1.54.4.1 -r1.54.4.2
*** rfc822.py 23 Dec 2001 07:02:01 -0000 1.54.4.1
--- rfc822.py 3 May 2002 04:23:02 -0000 1.54.4.2
***************
*** 1,51 ****
! """RFC-822 message manipulation class.
! XXX This is only a very rough sketch of a full RFC-822 parser;
! in particular the tokenizing of addresses does not adhere to all the
! quoting rules.
Directions for use:
To create a Message object: first open a file, e.g.:
fp = open(file, 'r')
You can use any other legal way of getting an open file object, e.g. use
! sys.stdin or call os.popen().
! Then pass the open file object to the Message() constructor:
m = Message(fp)
! This class can work with any input object that supports a readline
! method. If the input object has seek and tell capability, the
! rewindbody method will work; also illegal lines will be pushed back
! onto the input stream. If the input object lacks seek but has an
! `unread' method that can push back a line of input, Message will use
! that to push back illegal lines. Thus this class can be used to parse
! messages coming from a buffered stream.
! The optional `seekable' argument is provided as a workaround for
! certain stdio libraries in which tell() discards buffered data before
! discovering that the lseek() system call doesn't work. For maximum
! portability, you should set the seekable argument to zero to prevent
! that initial \code{tell} when passing in an unseekable object such as
! a a file object created from a socket object. If it is 1 on entry --
! which it is by default -- the tell() method of the open file object is
! called once; if this raises an exception, seekable is reset to 0. For
! other nonzero values of seekable, this test is not made.
To get the text of a particular header there are several methods:
str = m.getheader(name)
str = m.getrawheader(name)
! where name is the name of the header, e.g. 'Subject'.
! The difference is that getheader() strips the leading and trailing
! whitespace, while getrawheader() doesn't. Both functions retain
! embedded whitespace (including newlines) exactly as they are
! specified in the header, and leave the case of the text unchanged.
For addresses and address lists there are functions
! realname, mailaddress = m.getaddr(name) and
list = m.getaddrlist(name)
where the latter returns a list of (realname, mailaddr) tuples.
There is also a method
time = m.getdate(name)
which parses a Date-like field and returns a time-compatible tuple,
i.e. a tuple such as returned by time.localtime() or accepted by
--- 1,65 ----
! """RFC 2822 message manipulation.
! Note: This is only a very rough sketch of a full RFC-822 parser; in particular
! the tokenizing of addresses does not adhere to all the quoting rules.
!
! Note: RFC 2822 is a long awaited update to RFC 822. This module should
! conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
! effort at RFC 2822 updates have been made, but a thorough audit has not been
! performed. Consider any RFC 2822 non-conformance to be a bug.
!
! RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
! RFC 822: http://www.faqs.org/rfcs/rfc822.html (obsolete)
Directions for use:
To create a Message object: first open a file, e.g.:
+
fp = open(file, 'r')
+
You can use any other legal way of getting an open file object, e.g. use
! sys.stdin or call os.popen(). Then pass the open file object to the Message()
! constructor:
!
m = Message(fp)
! This class can work with any input object that supports a readline method. If
! the input object has seek and tell capability, the rewindbody method will
! work; also illegal lines will be pushed back onto the input stream. If the
! input object lacks seek but has an `unread' method that can push back a line
! of input, Message will use that to push back illegal lines. Thus this class
! can be used to parse messages coming from a buffered stream.
! The optional `seekable' argument is provided as a workaround for certain stdio
! libraries in which tell() discards buffered data before discovering that the
! lseek() system call doesn't work. For maximum portability, you should set the
! seekable argument to zero to prevent that initial \code{tell} when passing in
! an unseekable object such as a a file object created from a socket object. If
! it is 1 on entry -- which it is by default -- the tell() method of the open
! file object is called once; if this raises an exception, seekable is reset to
! 0. For other nonzero values of seekable, this test is not made.
To get the text of a particular header there are several methods:
+
str = m.getheader(name)
str = m.getrawheader(name)
!
! where name is the name of the header, e.g. 'Subject'. The difference is that
! getheader() strips the leading and trailing whitespace, while getrawheader()
! doesn't. Both functions retain embedded whitespace (including newlines)
! exactly as they are specified in the header, and leave the case of the text
! unchanged.
For addresses and address lists there are functions
!
! realname, mailaddress = m.getaddr(name)
list = m.getaddrlist(name)
+
where the latter returns a list of (realname, mailaddr) tuples.
There is also a method
+
time = m.getdate(name)
+
which parses a Date-like field and returns a time-compatible tuple,
i.e. a tuple such as returned by time.localtime() or accepted by
***************
*** 66,70 ****
class Message:
! """Represents a single RFC-822-compliant message."""
def __init__(self, fp, seekable = 1):
--- 80,84 ----
class Message:
! """Represents a single RFC 2822-compliant message."""
def __init__(self, fp, seekable = 1):
***************
*** 107,122 ****
"""Read header lines.
! Read header lines up to the entirely blank line that
! terminates them. The (normally blank) line that ends the
! headers is skipped, but not included in the returned list.
! If a non-header line ends the headers, (which is an error),
! an attempt is made to backspace over it; it is never
! included in the returned list.
! The variable self.status is set to the empty string if all
! went well, otherwise it is an error message.
! The variable self.headers is a completely uninterpreted list
! of lines contained in the header (so printing them will
! reproduce the header exactly as it appears in the file).
"""
self.dict = {}
--- 121,135 ----
"""Read header lines.
! Read header lines up to the entirely blank line that terminates them.
! The (normally blank) line that ends the headers is skipped, but not
! included in the returned list. If a non-header line ends the headers,
! (which is an error), an attempt is made to backspace over it; it is
! never included in the returned list.
! The variable self.status is set to the empty string if all went well,
! otherwise it is an error message. The variable self.headers is a
! completely uninterpreted list of lines contained in the header (so
! printing them will reproduce the header exactly as it appears in the
! file).
"""
self.dict = {}
***************
*** 184,189 ****
This method should return the header name, suitably canonicalized.
! You may override this method in order to use Message parsing
! on tagged data in RFC822-like formats with special header formats.
"""
i = line.find(':')
--- 197,202 ----
This method should return the header name, suitably canonicalized.
! You may override this method in order to use Message parsing on tagged
! data in RFC 2822-like formats with special header formats.
"""
i = line.find(':')
***************
*** 194,204 ****
def islast(self, line):
! """Determine whether a line is a legal end of RFC-822 headers.
! You may override this method if your application wants
! to bend the rules, e.g. to strip trailing whitespace,
! or to recognize MH template separators ('--------').
! For convenience (e.g. for code reading from sockets) a
! line consisting of \r\n also matches.
"""
return line in _blanklines
--- 207,216 ----
def islast(self, line):
! """Determine whether a line is a legal end of RFC 2822 headers.
! You may override this method if your application wants to bend the
! rules, e.g. to strip trailing whitespace, or to recognize MH template
! separators ('--------'). For convenience (e.g. for code reading from
! sockets) a line consisting of \r\n also matches.
"""
return line in _blanklines
***************
*** 207,213 ****
"""Determine whether a line should be skipped entirely.
! You may override this method in order to use Message parsing
! on tagged data in RFC822-like formats that support embedded
! comments or free-text data.
"""
return None
--- 219,225 ----
"""Determine whether a line should be skipped entirely.
! You may override this method in order to use Message parsing on tagged
! data in RFC 2822-like formats that support embedded comments or
! free-text data.
"""
return None
***************
*** 216,226 ****
"""Find all header lines matching a given header name.
! Look through the list of headers and find all lines
! matching a given header name (and their continuation
! lines). A list of the lines is returned, without
! interpretation. If the header does not occur, an
! empty list is returned. If the header occurs multiple
! times, all occurrences are returned. Case is not
! important in the header name.
"""
name = name.lower() + ':'
--- 228,236 ----
"""Find all header lines matching a given header name.
! Look through the list of headers and find all lines matching a given
! header name (and their continuation lines). A list of the lines is
! returned, without interpretation. If the header does not occur, an
! empty list is returned. If the header occurs multiple times, all
! occurrences are returned. Case is not important in the header name.
"""
name = name.lower() + ':'
***************
*** 240,246 ****
"""Get the first header line matching name.
! This is similar to getallmatchingheaders, but it returns
! only the first matching header (and its continuation
! lines).
"""
name = name.lower() + ':'
--- 250,255 ----
"""Get the first header line matching name.
! This is similar to getallmatchingheaders, but it returns only the
! first matching header (and its continuation lines).
"""
name = name.lower() + ':'
***************
*** 261,269 ****
"""A higher-level interface to getfirstmatchingheader().
! Return a string containing the literal text of the
! header but with the keyword stripped. All leading,
! trailing and embedded whitespace is kept in the
! string, however.
! Return None if the header does not occur.
"""
--- 270,277 ----
"""A higher-level interface to getfirstmatchingheader().
! Return a string containing the literal text of the header but with the
! keyword stripped. All leading, trailing and embedded whitespace is
! kept in the string, however. Return None if the header does not
! occur.
"""
***************
*** 277,284 ****
"""Get the header value for a name.
! This is the normal interface: it returns a stripped
! version of the header value for a given header name,
! or None if it doesn't exist. This uses the dictionary
! version which finds the *last* such header.
"""
try:
--- 285,291 ----
"""Get the header value for a name.
! This is the normal interface: it returns a stripped version of the
! header value for a given header name, or None if it doesn't exist.
! This uses the dictionary version which finds the *last* such header.
"""
try:
***************
*** 291,298 ****
"""Get all values for a header.
! This returns a list of values for headers given more than once;
! each value in the result list is stripped in the same way as the
! result of getheader(). If the header is not given, return an
! empty list.
"""
result = []
--- 298,304 ----
"""Get all values for a header.
! This returns a list of values for headers given more than once; each
! value in the result list is stripped in the same way as the result of
! getheader(). If the header is not given, return an empty list.
"""
result = []
***************
*** 333,337 ****
tuple as returned by getaddr(). Scans all named headers, so it works
properly with multiple To: or Cc: headers for example.
-
"""
raw = []
--- 339,342 ----
***************
*** 353,358 ****
"""Retrieve a date field from a header.
! Retrieves a date field from the named header, returning
! a tuple compatible with time.mktime().
"""
try:
--- 358,363 ----
"""Retrieve a date field from a header.
! Retrieves a date field from the named header, returning a tuple
! compatible with time.mktime().
"""
try:
***************
*** 365,371 ****
"""Retrieve a date field from a header as a 10-tuple.
! The first 9 elements make up a tuple compatible with
! time.mktime(), and the 10th is the offset of the poster's
! time zone from GMT/UTC.
"""
try:
--- 370,375 ----
"""Retrieve a date field from a header as a 10-tuple.
! The first 9 elements make up a tuple compatible with time.mktime(),
! and the 10th is the offset of the poster's time zone from GMT/UTC.
"""
try:
***************
*** 389,395 ****
"""Set the value of a header.
! Note: This is not a perfect inversion of __getitem__, because
! any changed headers get stuck at the end of the raw-headers list
! rather than where the altered header was.
"""
del self[name] # Won't fail if it doesn't exist
--- 393,399 ----
"""Set the value of a header.
! Note: This is not a perfect inversion of __getitem__, because any
! changed headers get stuck at the end of the raw-headers list rather
! than where the altered header was.
"""
del self[name] # Won't fail if it doesn't exist
***************
*** 484,488 ****
To understand what this class does, it helps to have a copy of
! RFC-822 in front of you.
Note: this class interface is deprecated and may be removed in the future.
--- 488,494 ----
To understand what this class does, it helps to have a copy of
! RFC 2822 in front of you.
!
! http://www.faqs.org/rfcs/rfc2822.html
Note: this class interface is deprecated and may be removed in the future.
***************
*** 493,498 ****
"""Initialize a new instance.
! `field' is an unparsed address header field, containing
! one or more addresses.
"""
self.specials = '()<>@,:;.\"[]'
--- 499,504 ----
"""Initialize a new instance.
! `field' is an unparsed address header field, containing one or more
! addresses.
"""
self.specials = '()<>@,:;.\"[]'
***************
*** 501,504 ****
--- 507,514 ----
self.CR = '\r\n'
self.atomends = self.specials + self.LWS + self.CR
+ # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
+ # is obsolete syntax. RFC 2822 requires that we recognize obsolete
+ # syntax, so allow dots in phrases.
+ self.phraseends = self.atomends.replace('.', '')
self.field = field
self.commentlist = []
***************
*** 615,619 ****
def getaddrspec(self):
! """Parse an RFC-822 addr-spec."""
aslist = []
--- 625,629 ----
def getaddrspec(self):
! """Parse an RFC 2822 addr-spec."""
aslist = []
***************
*** 659,671 ****
"""Parse a header fragment delimited by special characters.
! `beginchar' is the start character for the fragment.
! If self is not looking at an instance of `beginchar' then
! getdelimited returns the empty string.
`endchars' is a sequence of allowable end-delimiting characters.
Parsing stops when one of these is encountered.
! If `allowcomments' is non-zero, embedded RFC-822 comments
! are allowed within the parsed fragment.
"""
if self.field[self.pos] != beginchar:
--- 669,681 ----
"""Parse a header fragment delimited by special characters.
! `beginchar' is the start character for the fragment. If self is not
! looking at an instance of `beginchar' then getdelimited returns the
! empty string.
`endchars' is a sequence of allowable end-delimiting characters.
Parsing stops when one of these is encountered.
! If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
! within the parsed fragment.
"""
if self.field[self.pos] != beginchar:
***************
*** 701,713 ****
def getdomainliteral(self):
! """Parse an RFC-822 domain-literal."""
return '[%s]' % self.getdelimited('[', ']\r', 0)
! def getatom(self):
! """Parse an RFC-822 atom."""
atomlist = ['']
while self.pos < len(self.field):
! if self.field[self.pos] in self.atomends:
break
else: atomlist.append(self.field[self.pos])
--- 711,730 ----
def getdomainliteral(self):
! """Parse an RFC 2822 domain-literal."""
return '[%s]' % self.getdelimited('[', ']\r', 0)
! def getatom(self, atomends=None):
! """Parse an RFC 2822 atom.
!
! Optional atomends specifies a different set of end token delimiters
! (the default is to use self.atomends). This is used e.g. in
! getphraselist() since phrase endings must not include the `.' (which
! is legal in phrases)."""
atomlist = ['']
+ if atomends is None:
+ atomends = self.atomends
while self.pos < len(self.field):
! if self.field[self.pos] in atomends:
break
else: atomlist.append(self.field[self.pos])
***************
*** 717,725 ****
def getphraselist(self):
! """Parse a sequence of RFC-822 phrases.
! A phrase is a sequence of words, which are in turn either
! RFC-822 atoms or quoted-strings. Phrases are canonicalized
! by squeezing all runs of continuous whitespace into one space.
"""
plist = []
--- 734,742 ----
def getphraselist(self):
! """Parse a sequence of RFC 2822 phrases.
! A phrase is a sequence of words, which are in turn either RFC 2822
! atoms or quoted-strings. Phrases are canonicalized by squeezing all
! runs of continuous whitespace into one space.
"""
plist = []
***************
*** 732,743 ****
elif self.field[self.pos] == '(':
self.commentlist.append(self.getcomment())
! elif self.field[self.pos] in self.atomends:
break
! else: plist.append(self.getatom())
return plist
class AddressList(AddrlistClass):
! """An AddressList encapsulates a list of parsed RFC822 addresses."""
def __init__(self, field):
AddrlistClass.__init__(self, field)
--- 749,761 ----
elif self.field[self.pos] == '(':
self.commentlist.append(self.getcomment())
! elif self.field[self.pos] in self.phraseends:
break
! else:
! plist.append(self.getatom(self.phraseends))
return plist
class AddressList(AddrlistClass):
! """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
def __init__(self, field):
AddrlistClass.__init__(self, field)