[Python-checkins] cpython: #11684: Complete parser bytes interface by adding BytesHeaderParser

Wed Apr 13 22:46:37 CEST 2011

http://hg.python.org/cpython/rev/a95d936ce8eb
changeset:   69334:a95d936ce8eb
user:        R David Murray <rdmurray at bitdance.com>
date:        Wed Apr 13 16:46:05 2011 -0400
summary:
  #11684: Complete parser bytes interface by adding BytesHeaderParser

Patch by Steffen Daode Nurpmeso.

files:
  Doc/library/email.parser.rst      |  14 ++++++----
  Lib/email/generator.py            |   4 ++-
  Lib/email/parser.py               |  10 +++++++-
  Lib/test/test_email/test_email.py |  24 +++++++++++++++++++
  Misc/NEWS                         |   2 +
  5 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/Doc/library/email.parser.rst b/Doc/library/email.parser.rst
--- a/Doc/library/email.parser.rst
+++ b/Doc/library/email.parser.rst
@@ -94,12 +94,14 @@
 The :class:`Parser` class, imported from the :mod:`email.parser` module,
 provides an API that can be used to parse a message when the complete contents
 of the message are available in a string or file.  The :mod:`email.parser`
-module also provides a second class, called :class:`HeaderParser` which can be
-used if you're only interested in the headers of the message.
-:class:`HeaderParser` can be much faster in these situations, since it does not
-attempt to parse the message body, instead setting the payload to the raw body
-as a string. :class:`HeaderParser` has the same API as the :class:`Parser`
-class.
+module also provides header-only parsers, called :class:`HeaderParser` and
+:class:`BytesHeaderParser`, which can be used if you're only interested in the
+headers of the message.  :class:`HeaderParser` and :class:`BytesHeaderParser`
+can be much faster in these situations, since they do not attempt to parse the
+message body, instead setting the payload to the raw body as a string.  They
+have the same API as the :class:`Parser` and :class:`BytesParser` classes.
+
+.. versionadded:: 3.3 BytesHeaderParser
 
 
 .. class:: Parser(_class=email.message.Message)
diff --git a/Lib/email/generator.py b/Lib/email/generator.py
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -297,10 +297,12 @@
         # message/rfc822.  Such messages are generated by, for example,
         # Groupwise when forwarding unadorned messages.  (Issue 7970.)  So
         # in that case we just emit the string body.
-        payload = msg.get_payload()
+        payload = msg._payload
         if isinstance(payload, list):
             g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
             payload = s.getvalue()
+        else:
+            payload = self._encode(payload)
         self._fp.write(payload)
 
     # This used to be a module level function; we use a classmethod for this
diff --git a/Lib/email/parser.py b/Lib/email/parser.py
--- a/Lib/email/parser.py
+++ b/Lib/email/parser.py
@@ -4,7 +4,7 @@
 
 """A parser of RFC 2822 and MIME email messages."""
 
-__all__ = ['Parser', 'HeaderParser']
+__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser']
 
 import warnings
 from io import StringIO, TextIOWrapper
@@ -114,3 +114,11 @@
         """
         text = text.decode('ASCII', errors='surrogateescape')
         return self.parser.parsestr(text, headersonly)
+
+
+class BytesHeaderParser(BytesParser):
+    def parse(self, fp, headersonly=True):
+        return BytesParser.parse(self, fp, headersonly=True)
+
+    def parsebytes(self, text, headersonly=True):
+        return BytesParser.parsebytes(self, text, headersonly=True)
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -177,6 +177,17 @@
         gen.flatten(msg, False)
         self.assertEqual(out.getvalue(), msgdata)
 
+    def test_byte_message_rfc822_only(self):
+        # Make sure new bytes header parser also passes this.
+        with openfile('msg_46.txt', 'rb') as fp:
+            msgdata = fp.read()
+        parser = email.parser.BytesHeaderParser()
+        msg = parser.parsebytes(msgdata)
+        out = BytesIO()
+        gen = email.generator.BytesGenerator(out)
+        gen.flatten(msg)
+        self.assertEqual(out.getvalue(), msgdata)
+
     def test_get_decoded_payload(self):
         eq = self.assertEqual
         msg = self._msgobj('msg_10.txt')
@@ -2749,6 +2760,7 @@
 
 
 class TestParsers(TestEmailBase):
+
     def test_header_parser(self):
         eq = self.assertEqual
         # Parse only the headers of a complex multipart MIME document
@@ -2760,6 +2772,18 @@
         self.assertFalse(msg.is_multipart())
         self.assertTrue(isinstance(msg.get_payload(), str))
 
+    def test_bytes_header_parser(self):
+        eq = self.assertEqual
+        # Parse only the headers of a complex multipart MIME document
+        with openfile('msg_02.txt', 'rb') as fp:
+            msg = email.parser.BytesHeaderParser().parse(fp)
+        eq(msg['from'], 'ppp-request at zzz.org')
+        eq(msg['to'], 'ppp at zzz.org')
+        eq(msg.get_content_type(), 'multipart/mixed')
+        self.assertFalse(msg.is_multipart())
+        self.assertTrue(isinstance(msg.get_payload(), str))
+        self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
+
     def test_whitespace_continuation(self):
         eq = self.assertEqual
         # This message contains a line after the Subject: header that has only
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -103,6 +103,8 @@
 Library
 -------
 
+- Issue #11684: complete email.parser bytes API by adding BytesHeaderParser.
+
 - The bz2 module now handles 4GiB+ input buffers correctly.
 
 - Issue #9233: Fix json.loads('{}') to return a dict (instead of a list), when

-- 
Repository URL: http://hg.python.org/cpython