[Python-checkins] cpython (3.4): handle headers with no key (closes #19996)

benjamin.peterson python-checkins at python.org
Mon Jan 26 05:35:21 CET 2015


https://hg.python.org/cpython/rev/25ecf3d0ea03
changeset:   94282:25ecf3d0ea03
branch:      3.4
parent:      94277:2de90090e486
user:        Benjamin Peterson <benjamin at python.org>
date:        Sun Jan 25 23:30:30 2015 -0500
summary:
  handle headers with no key (closes #19996)

Patch by Cory Benfield.

files:
  Lib/email/feedparser.py           |  11 ++++++++++-
  Lib/test/test_email/test_email.py |   6 ++++++
  Lib/test/test_httplib.py          |  10 ++++++++++
  Misc/NEWS                         |   3 +++
  4 files changed, 29 insertions(+), 1 deletions(-)


diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py
--- a/Lib/email/feedparser.py
+++ b/Lib/email/feedparser.py
@@ -33,7 +33,7 @@
 NLCRE_crack = re.compile('(\r\n|\r|\n)')
 # RFC 2822 $3.6.8 Optional fields.  ftext is %d33-57 / %d59-126, Any character
 # except controls, SP, and ":".
-headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])')
+headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])')
 EMPTYSTRING = ''
 NL = '\n'
 
@@ -511,6 +511,15 @@
             # There will always be a colon, because if there wasn't the part of
             # the parser that calls us would have started parsing the body.
             i = line.find(':')
+
+            # If the colon is on the start of the line the header is clearly
+            # malformed, but we might be able to salvage the rest of the
+            # message. Track the error but keep going.
+            if i == 0:
+                defect = errors.InvalidHeaderDefect("Missing header name.")
+                self._cur.defects.append(defect)
+                continue
+
             assert i>0, "_parse_headers fed line with no : and no leading WS"
             lastheader = line[:i]
             lastvalue = [line]
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -3389,6 +3389,12 @@
             feedparser.feed(chunk)
         return feedparser.close()
 
+    def test_empty_header_name_handled(self):
+        # Issue 19996
+        msg = self.parse("First: val\n: bad\nSecond: val")
+        self.assertEqual(msg['First'], 'val')
+        self.assertEqual(msg['Second'], 'val')
+
     def test_newlines(self):
         m = self.parse(['a:\nb:\rc:\r\nd:\n'])
         self.assertEqual(m.keys(), ['a', 'b', 'c', 'd'])
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -167,6 +167,16 @@
         conn.request('GET', '/foo')
         self.assertTrue(sock.data.startswith(expected))
 
+    def test_malformed_headers_coped_with(self):
+        # Issue 19996
+        body = "HTTP/1.1 200 OK\r\nFirst: val\r\n: nval\r\nSecond: val\r\n\r\n"
+        sock = FakeSocket(body)
+        resp = client.HTTPResponse(sock)
+        resp.begin()
+
+        self.assertEqual(resp.getheader('First'), 'val')
+        self.assertEqual(resp.getheader('Second'), 'val')
+
 
 class BasicTest(TestCase):
     def test_status_lines(self):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -47,6 +47,9 @@
 Library
 -------
 
+- Issue #19996: :class:`email.feedparser.FeedParser` now handles (malformed)
+  headers with no key rather than amusing the body has started.
+
 - Issue #23248: Update ssl error codes from latest OpenSSL git master.
 
 - Issue #23098: 64-bit dev_t is now supported in the os module.

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list