[Python-checkins] bpo-38698: Add a new InvalidMessageID token to email header parser. (GH-17503)

Miss Islington (bot) webhook-mailer at python.org
Sun Dec 8 21:11:38 EST 2019


https://github.com/python/cpython/commit/f66f4a09d0b6817fe6a86a567fd506aa223f1563
commit: f66f4a09d0b6817fe6a86a567fd506aa223f1563
branch: 3.8
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: GitHub <noreply at github.com>
date: 2019-12-08T18:11:31-08:00
summary:

bpo-38698: Add a new InvalidMessageID token to email header parser. (GH-17503)


This adds a new InvalidMessageID token to the email header parser which can be
used to represent invalid message-id headers in the parse tree.
(cherry picked from commit 68157da8b42b26408af5d157d2dba4fcf29c6320)

Co-authored-by: Abhilash Raj <maxking at users.noreply.github.com>

files:
A Misc/NEWS.d/next/Library/2019-12-07-21-49-50.bpo-38698.HxoSym.rst
M Lib/email/_header_value_parser.py
M Lib/test/test_email/test__header_value_parser.py

diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index abdef8189ca6f..cb013225ec60c 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -850,10 +850,15 @@ def fold(self, policy):
         # message-id tokens may not be folded.
         return str(self) + policy.linesep
 
+
 class MessageID(MsgID):
     token_type = 'message-id'
 
 
+class InvalidMessageID(MessageID):
+    token_type = 'invalid-message-id'
+
+
 class Header(TokenList):
     token_type = 'header'
 
@@ -2110,11 +2115,18 @@ def parse_message_id(value):
     message_id = MessageID()
     try:
         token, value = get_msg_id(value)
-    except errors.HeaderParseError:
-        message_id.defects.append(errors.InvalidHeaderDefect(
-            "Expected msg-id but found {!r}".format(value)))
-    else:
         message_id.append(token)
+    except errors.HeaderParseError as ex:
+        token = get_unstructured(value)
+        message_id = InvalidMessageID(token)
+        message_id.defects.append(
+            errors.InvalidHeaderDefect("Invalid msg-id: {!r}".format(ex)))
+    else:
+        # Value after parsing a valid msg_id should be None.
+        if value:
+            message_id.defects.append(errors.InvalidHeaderDefect(
+                "Unexpected {!r}".format(value)))
+
     return message_id
 
 #
diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py
index 2f63a3b3e0524..7c9f9877d7be2 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -2639,10 +2639,44 @@ def test_get_msg_id_no_id_right_part(self):
         self.assertEqual(msg_id.token_type, 'msg-id')
 
     def test_get_msg_id_invalid_expected_msg_id_not_found(self):
-        text = "Message-Id: 935-XPB-567:0:86089:180874:0:45327:9:90305:17843586-40 at example.com"
+        text = "935-XPB-567:0:45327:9:90305:17843586-40 at example.com"
         msg_id = parser.parse_message_id(text)
-        self.assertDefectsEqual(msg_id.all_defects,
-                                [errors.InvalidHeaderDefect])
+        self.assertDefectsEqual(
+            msg_id.all_defects,
+            [errors.InvalidHeaderDefect])
+
+    def test_parse_invalid_message_id(self):
+        message_id = self._test_parse_x(
+            parser.parse_message_id,
+            "935-XPB-567:0:45327:9:90305:17843586-40 at example.com",
+            "935-XPB-567:0:45327:9:90305:17843586-40 at example.com",
+            "935-XPB-567:0:45327:9:90305:17843586-40 at example.com",
+            [errors.InvalidHeaderDefect],
+            )
+        self.assertEqual(message_id.token_type, 'invalid-message-id')
+
+    def test_parse_valid_message_id(self):
+        message_id = self._test_parse_x(
+            parser.parse_message_id,
+            "<aperson at somedomain>",
+            "<aperson at somedomain>",
+            "<aperson at somedomain>",
+            [],
+            )
+        self.assertEqual(message_id.token_type, 'message-id')
+
+    def test_parse_message_id_with_remaining(self):
+        message_id = self._test_parse_x(
+            parser.parse_message_id,
+            "<validmessageid at example>thensomething",
+            "<validmessageid at example>",
+            "<validmessageid at example>",
+            [errors.InvalidHeaderDefect],
+            [],
+            )
+        self.assertEqual(message_id.token_type, 'message-id')
+        self.assertEqual(str(message_id.all_defects[0]),
+                         "Unexpected 'thensomething'")
 
     def test_get_msg_id_no_angle_start(self):
         with self.assertRaises(errors.HeaderParseError):
diff --git a/Misc/NEWS.d/next/Library/2019-12-07-21-49-50.bpo-38698.HxoSym.rst b/Misc/NEWS.d/next/Library/2019-12-07-21-49-50.bpo-38698.HxoSym.rst
new file mode 100644
index 0000000000000..b930dea0fa7bc
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-12-07-21-49-50.bpo-38698.HxoSym.rst
@@ -0,0 +1,3 @@
+Add a new ``InvalidMessageID`` token to email parser to represent invalid
+Message-ID headers.  Also, add defects when there is remaining value after
+parsing the header.



More information about the Python-checkins mailing list