[Python-checkins] r57694 - in sandbox/trunk/emailpkg/5_0-exp/email: base64mime.py charset.py header.py test/test_email.py

barry.warsaw python-checkins at python.org
Thu Aug 30 04:22:55 CEST 2007


Author: barry.warsaw
Date: Thu Aug 30 04:22:54 2007
New Revision: 57694

Modified:
   sandbox/trunk/emailpkg/5_0-exp/email/base64mime.py
   sandbox/trunk/emailpkg/5_0-exp/email/charset.py
   sandbox/trunk/emailpkg/5_0-exp/email/header.py
   sandbox/trunk/emailpkg/5_0-exp/email/test/test_email.py
Log:
Last commit here before merging back to the py3k branch head.  This changes
the default Header maximum line length to 78 for consistency with RFC 2822 and
other interfaces in this package.

Normalize the chunks before generating the output of Header.__str__().

Headers must compare equality against their unicode unencoded string value not
their RFC 2047 encoded value.  This is because the unicode representation is
the canonical one and the encoded representation is influenced by external
factors such as maximum line lengths.

Work in a 'transitional space' for dealing with the RFC 2047 required space
between encoded words on the same line.  This isn't quite right because it
leaves a bogus trailing space on the last line of a header if it ends with an
encoded word.  I'll fix this after the merge back to the py3k branch.

base64mime.body_encode(): Remove the 'binary' argument so I can also kill
fix_eols().


Modified: sandbox/trunk/emailpkg/5_0-exp/email/base64mime.py
==============================================================================
--- sandbox/trunk/emailpkg/5_0-exp/email/base64mime.py	(original)
+++ sandbox/trunk/emailpkg/5_0-exp/email/base64mime.py	Thu Aug 30 04:22:54 2007
@@ -75,16 +75,12 @@
 
 
 
-def body_encode(s, binary=True, maxlinelen=76, eol=NL):
+def body_encode(s, maxlinelen=76, eol=NL):
     """Encode a string with base64.
 
     Each line will be wrapped at, at most, maxlinelen characters (defaults to
     76 characters).
 
-    If binary is False, end-of-line characters will be converted to the
-    canonical email end-of-line sequence \\r\\n.  Otherwise they will be left
-    verbatim (this is the default).
-
     Each line of encoded text will end with eol, which defaults to "\\n".  Set
     this to "\r\n" if you will be using the result of this function directly
     in an email.
@@ -92,9 +88,6 @@
     if not s:
         return s
 
-    if not binary:
-        s = fix_eols(s)
-
     encvec = []
     max_unencoded = maxlinelen * 3 // 4
     for i in range(0, len(s), max_unencoded):

Modified: sandbox/trunk/emailpkg/5_0-exp/email/charset.py
==============================================================================
--- sandbox/trunk/emailpkg/5_0-exp/email/charset.py	(original)
+++ sandbox/trunk/emailpkg/5_0-exp/email/charset.py	Thu Aug 30 04:22:54 2007
@@ -343,6 +343,7 @@
                 if not lines and not current_line:
                     lines.append(None)
                 else:
+                    separator = (' ' if lines else '')
                     joined_line = EMPTYSTRING.join(current_line)
                     header_bytes = joined_line.encode(codec)
                     lines.append(encoder(header_bytes))

Modified: sandbox/trunk/emailpkg/5_0-exp/email/header.py
==============================================================================
--- sandbox/trunk/emailpkg/5_0-exp/email/header.py	(original)
+++ sandbox/trunk/emailpkg/5_0-exp/email/header.py	Thu Aug 30 04:22:54 2007
@@ -25,10 +25,11 @@
 SPACE8 = ' ' * 8
 EMPTYSTRING = ''
 
-MAXLINELEN = 76
+MAXLINELEN = 78
 
 USASCII = Charset('us-ascii')
 UTF8 = Charset('utf-8')
+TRANSITIONAL_SPACE = object()
 
 # Match encoded-word strings in the form =?charset?q?Hello_World?=
 ecre = re.compile(r'''
@@ -170,7 +171,8 @@
         The maximum line length can be specified explicit via maxlinelen.  For
         splitting the first line to a shorter value (to account for the field
         header which isn't included in s, e.g. `Subject') pass in the name of
-        the field in header_name.  The default maxlinelen is 76.
+        the field in header_name.  The default maxlinelen is 78 as recommended
+        by RFC 2822.
 
         continuation_ws must be RFC 2822 compliant folding whitespace (usually
         either a space or a hard tab) which will be prepended to continuation
@@ -198,9 +200,10 @@
 
     def __str__(self):
         """Return the string value of the header."""
+        self._normalize()
         uchunks = []
         lastcs = None
-        for s, charset in self._chunks:
+        for string, charset in self._chunks:
             # We must preserve spaces between encoded and non-encoded word
             # boundaries, which means for us we need to add a space when we go
             # from a charset to None/us-ascii, or from None/us-ascii to a
@@ -214,15 +217,16 @@
                 elif nextcs not in (None, 'us-ascii'):
                     uchunks.append(SPACE)
             lastcs = nextcs
-            uchunks.append(s)
+            uchunks.append(string)
         return EMPTYSTRING.join(uchunks)
 
     # Rich comparison operators for equality only.  BAW: does it make sense to
     # have or explicitly disable <, <=, >, >= operators?
     def __eq__(self, other):
         # other may be a Header or a string.  Both are fine so coerce
-        # ourselves to a string, swap the args and do another comparison.
-        return other == self.encode()
+        # ourselves to a unicode (of the unencoded header value), swap the
+        # args and do another comparison.
+        return other == str(self)
 
     def __ne__(self, other):
         return not self == other
@@ -308,9 +312,8 @@
         return str(formatter)
 
     def _normalize(self):
-        # Normalize the chunks so that all runs of identical charsets get
-        # collapsed into a single unicode string.  You need a space between
-        # encoded words, or between encoded and unencoded words.
+        # Step 1: Normalize the chunks so that all runs of identical charsets
+        # get collapsed into a single unicode string.
         chunks = []
         last_charset = None
         last_chunk = []
@@ -320,8 +323,6 @@
             else:
                 if last_charset is not None:
                     chunks.append((SPACE.join(last_chunk), last_charset))
-                    if last_charset != USASCII or charset != USASCII:
-                        chunks.append((' ', USASCII))
                 last_chunk = [string]
                 last_charset = charset
         if last_chunk:
@@ -340,6 +341,10 @@
         self._current_line = _Accumulator(headerlen)
 
     def __str__(self):
+        # Remove the trailing TRANSITIONAL_SPACE
+        last_line = self._current_line.pop()
+        if last_line is not TRANSITIONAL_SPACE:
+            self._current_line.push(last_line)
         self.newline()
         return NL.join(self._lines)
 
@@ -402,6 +407,7 @@
             # There was only one line.
             return
         self._current_line.push(last_line)
+        self._current_line.push(TRANSITIONAL_SPACE)
         # Everything else are full lines in themselves.
         for line in encoded_lines:
             self._lines.append(self._continuation_ws + line)
@@ -551,11 +557,14 @@
 
     def __len__(self):
         return sum((len(string)
-                    for string in self._current),
+                    for string in self._current
+                    if string is not TRANSITIONAL_SPACE),
                    self._initial_size)
 
     def __str__(self):
-        return EMPTYSTRING.join(self._current)
+        return EMPTYSTRING.join(
+            (' ' if string is TRANSITIONAL_SPACE else string)
+            for string in self._current)
 
     def reset(self, string=None):
         self._current = []

Modified: sandbox/trunk/emailpkg/5_0-exp/email/test/test_email.py
==============================================================================
--- sandbox/trunk/emailpkg/5_0-exp/email/test/test_email.py	(original)
+++ sandbox/trunk/emailpkg/5_0-exp/email/test/test_email.py	Thu Aug 30 04:22:54 2007
@@ -580,31 +580,31 @@
         g = Generator(sfp)
         g.flatten(msg)
         eq(sfp.getvalue(), """\
-Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerd?=
- =?iso-8859-1?q?erband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndi?=
- =?iso-8859-1?q?schen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Kling?=
- =?iso-8859-1?q?en_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_met?=
- =?iso-8859-2?q?ropole_se_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
- =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE?=
- =?utf-8?b?44G+44Gb44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB?=
- =?utf-8?b?44GC44Go44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CM?=
- =?utf-8?q?Wenn_ist_das_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das?=
- =?utf-8?b?IE9kZXIgZGllIEZsaXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBow==?=
- =?utf-8?b?44Gm44GE44G+44GZ44CC?=
-
-""")
-        eq(h.encode(), """\
-=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerd?=
- =?iso-8859-1?q?erband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndi?=
- =?iso-8859-1?q?schen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Kling?=
- =?iso-8859-1?q?en_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_met?=
- =?iso-8859-2?q?ropole_se_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
- =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE?=
- =?utf-8?b?44G+44Gb44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB?=
- =?utf-8?b?44GC44Go44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CM?=
- =?utf-8?q?Wenn_ist_das_Nunstuck_git_und_Slotermeyer=3F_Ja!_Beiherhund_das?=
- =?utf-8?b?IE9kZXIgZGllIEZsaXBwZXJ3YWxkdCBnZXJzcHV0LuOAjeOBqOiogOOBow==?=
- =?utf-8?b?44Gm44GE44G+44GZ44CC?=""")
+Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
+ =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
+ =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
+ =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
+ =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
+ =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
+ =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
+ =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
+ =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
+ =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
+ =?utf-8?b?44CC?=
+
+""")
+        eq(h.encode(maxlinelen=76), """\
+=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
+ =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
+ =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
+ =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
+ =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= 
+ =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
+ =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
+ =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
+ =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
+ =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
+ =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
 
     def test_long_header_encode(self):
         eq = self.ndiffAssertEqual
@@ -727,7 +727,7 @@
         h = Header('Britische Regierung gibt', 'iso-8859-1',
                     header_name='Subject')
         h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
-        eq(h.encode(), """\
+        eq(h.encode(maxlinelen=76), """\
 =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
  =?iso-8859-1?q?hore-Windkraftprojekte?=""")
         msg['Subject'] = h
@@ -772,7 +772,7 @@
         s = 'This is an example of string which has almost the limit of header length.'
         h = Header(s)
         h.append('Add another line.')
-        eq(h.encode(), """\
+        eq(h.encode(maxlinelen=76), """\
 This is an example of string which has almost the limit of header length.
  Add another line.""")
 
@@ -793,7 +793,7 @@
               'bef\xf6rdert. ')
         h = Header(gs, 'iso-8859-1', header_name=fn)
         # BAW: this seems broken because the first line is too long
-        eq(h.encode(), """\
+        eq(h.encode(maxlinelen=76), """\
 =?iso-8859-1?q?Die_Mieter_treten_hier_e?=
  =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
  =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
@@ -2802,7 +2802,7 @@
         h = Header(g_head, g)
         h.append(cz_head, cz)
         h.append(utf8_head, utf8)
-        enc = h.encode()
+        enc = h.encode(maxlinelen=76)
         eq(enc, """\
 =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
  =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
@@ -2819,28 +2819,29 @@
         eq(len(decoded), 3)
         eq(decoded[0], (g_head, 'iso-8859-1'))
         eq(decoded[1], (cz_head, 'iso-8859-2'))
-        eq(decoded[3], (utf8_head, 'utf-8'))
+        eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
         ustr = str(h)
-        eq(ustr.encode('utf-8'),
-           'Die Mieter treten hier ein werden mit einem Foerderband '
-           'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
-           'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
-           'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
-           'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
-           '\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
-           '\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
-           '\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
-           '\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
-           '\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
-           '\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
-           '\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
-           '\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
-           'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
-           'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
-           '\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82')
+        eq(ustr,
+           (b'Die Mieter treten hier ein werden mit einem Foerderband '
+            b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
+            b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
+            b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
+            b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
+            b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
+            b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
+            b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
+            b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
+            b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
+            b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
+            b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
+            b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
+            b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
+            b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
+            b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
+            ).decode('utf-8'))
         # Test make_header()
         newh = make_header(decode_header(enc))
-        eq(newh, enc)
+        eq(newh, h)
 
     def test_empty_header_encode(self):
         h = Header()
@@ -2851,7 +2852,7 @@
         h = Header()
         eq(h, '')
         h.append('foo', Charset('iso-8859-1'))
-        eq(h, '=?iso-8859-1?q?foo?=')
+        eq(h, 'foo')
 
     def test_explicit_maxlinelen(self):
         eq = self.ndiffAssertEqual
@@ -3007,7 +3008,7 @@
         eq = self.assertEqual
         h = Header()
         h.append('hello', 'iso-8859-1')
-        eq(h, '=?iso-8859-1?q?hello?=')
+        eq(h, 'hello')
 
 ##    def test_unicode_error(self):
 ##        raises = self.assertRaises


More information about the Python-checkins mailing list