[Python-checkins] python/dist/src/Lib/email Message.py,1.23,1.24
barry@users.sourceforge.net
barry@users.sourceforge.net
Thu, 26 Sep 2002 10:19:37 -0700
Update of /cvsroot/python/python/dist/src/Lib/email
In directory usw-pr-cvs1:/tmp/cvs-serv27423
Modified Files:
Message.py
Log Message:
Fixing some RFC 2231 related issues as reported in the Spambayes
project, and with assistance from Oleg Broytmann. Specifically,
get_param(), get_params(): Document that these methods may return
parameter values that are either strings, or 3-tuples in the case of
RFC 2231 encoded parameters. The application should be prepared to
deal with such return values.
get_boundary(): Be prepared to deal with RFC 2231 encoded boundary
parameters. It makes little sense to have boundaries that are
anything but ascii, so if we get back a 3-tuple from get_param() we
will decode it into ascii and let any failures percolate up.
get_content_charset(): New method which treats the charset parameter
just like the boundary parameter in get_boundary(). Note that
"get_charset()" was already taken to return the default Charset
object.
get_charsets(): Rewrite to use get_content_charset().
Index: Message.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Message.py,v
retrieving revision 1.23
retrieving revision 1.24
diff -C2 -d -r1.23 -r1.24
*** Message.py 11 Sep 2002 14:11:35 -0000 1.23
--- Message.py 26 Sep 2002 17:19:34 -0000 1.24
***************
*** 54,58 ****
def _unquotevalue(value):
if isinstance(value, TupleType):
! return (value[0], value[1], Utils.unquote(value[2]))
else:
return Utils.unquote(value)
--- 54,58 ----
def _unquotevalue(value):
if isinstance(value, TupleType):
! return value[0], value[1], Utils.unquote(value[2])
else:
return Utils.unquote(value)
***************
*** 510,515 ****
split on the `=' sign. The left hand side of the `=' is the key,
while the right hand side is the value. If there is no `=' sign in
! the parameter the value is the empty string. The value is always
! unquoted, unless unquote is set to a false value.
Optional failobj is the object to return if there is no Content-Type:
--- 510,515 ----
split on the `=' sign. The left hand side of the `=' is the key,
while the right hand side is the value. If there is no `=' sign in
! the parameter the value is the empty string. The value is as
! described in the get_param() method.
Optional failobj is the object to return if there is no Content-Type:
***************
*** 530,538 ****
Optional failobj is the object to return if there is no Content-Type:
! header. Optional header is the header to search instead of
! Content-Type:
! Parameter keys are always compared case insensitively. Values are
! always unquoted, unless unquote is set to a false value.
"""
if not self.has_key(header):
--- 530,550 ----
Optional failobj is the object to return if there is no Content-Type:
! header, or the Content-Type header has no such parameter. Optional
! header is the header to search instead of Content-Type:
! Parameter keys are always compared case insensitively. The return
! value can either be a string, or a 3-tuple if the parameter was RFC
! 2231 encoded. When it's a 3-tuple, the elements of the value are of
! the form (CHARSET, LANGUAGE, VALUE), where LANGUAGE may be the empty
! string. Your application should be prepared to deal with these, and
! can convert the parameter to a Unicode string like so:
!
! param = msg.get_param('foo')
! if isinstance(param, tuple):
! param = unicode(param[2], param[0])
!
! In any case, the parameter value (either the returned string, or the
! VALUE item in the 3-tuple) is always unquoted, unless unquote is set
! to a false value.
"""
if not self.has_key(header):
***************
*** 675,678 ****
--- 687,693 ----
if boundary is missing:
return failobj
+ if isinstance(boundary, TupleType):
+ # RFC 2231 encoded, so decode. It better end up as ascii
+ return unicode(boundary[2], boundary[0]).encode('us-ascii')
return _unquotevalue(boundary.strip())
***************
*** 728,731 ****
--- 743,761 ----
from email._compat21 import walk
+ def get_content_charset(self, failobj=None):
+ """Return the charset parameter of the Content-Type header.
+
+ If there is no Content-Type header, or if that header has no charset
+ parameter, failobj is returned.
+ """
+ missing = []
+ charset = self.get_param('charset', missing)
+ if charset is missing:
+ return failobj
+ if isinstance(charset, TupleType):
+ # RFC 2231 encoded, so decode it, and it better end up as ascii.
+ return unicode(charset[2], charset[0]).encode('us-ascii')
+ return charset
+
def get_charsets(self, failobj=None):
"""Return a list containing the charset(s) used in this message.
***************
*** 744,746 ****
message will still return a list of length 1.
"""
! return [part.get_param('charset', failobj) for part in self.walk()]
--- 774,776 ----
message will still return a list of length 1.
"""
! return [part.get_content_charset(failobj) for part in self.walk()]