[Python-checkins] bpo-37764: Fix infinite loop when parsing unstructured email headers. (GH-15239)
Miss Islington (bot)
webhook-mailer at python.org
Sat Aug 31 11:25:39 EDT 2019
https://github.com/python/cpython/commit/c5b242f87f31286ad38991bc3868cf4cfbf2b681
commit: c5b242f87f31286ad38991bc3868cf4cfbf2b681
branch: master
author: Ashwin Ramaswami <aramaswamis at gmail.com>
committer: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
date: 2019-08-31T08:25:35-07:00
summary:
bpo-37764: Fix infinite loop when parsing unstructured email headers. (GH-15239)
Fixes a case in which email._header_value_parser.get_unstructured hangs the system for some invalid headers. This covers the cases in which the header contains either:
- a case without trailing whitespace
- an invalid encoded word
https://bugs.python.org/issue37764
This fix should also be backported to 3.7 and 3.8
https://bugs.python.org/issue37764
files:
A Misc/NEWS.d/next/Security/2019-08-27-01-13-05.bpo-37764.qv67PQ.rst
M Lib/email/_header_value_parser.py
M Lib/test/test_email/test__header_value_parser.py
M Lib/test/test_email/test_email.py
M Misc/ACKS
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py
index b5003943ab0d..16c19907d68d 100644
--- a/Lib/email/_header_value_parser.py
+++ b/Lib/email/_header_value_parser.py
@@ -935,6 +935,10 @@ def __str__(self):
return ''
+class _InvalidEwError(errors.HeaderParseError):
+ """Invalid encoded word found while parsing headers."""
+
+
# XXX these need to become classes and used as instances so
# that a program can't change them in a parse tree and screw
# up other parse trees. Maybe should have tests for that, too.
@@ -1039,7 +1043,10 @@ def get_encoded_word(value):
raise errors.HeaderParseError(
"expected encoded word but found {}".format(value))
remstr = ''.join(remainder)
- if len(remstr) > 1 and remstr[0] in hexdigits and remstr[1] in hexdigits:
+ if (len(remstr) > 1 and
+ remstr[0] in hexdigits and
+ remstr[1] in hexdigits and
+ tok.count('?') < 2):
# The ? after the CTE was followed by an encoded word escape (=XX).
rest, *remainder = remstr.split('?=', 1)
tok = tok + '?=' + rest
@@ -1051,7 +1058,7 @@ def get_encoded_word(value):
try:
text, charset, lang, defects = _ew.decode('=?' + tok + '?=')
except ValueError:
- raise errors.HeaderParseError(
+ raise _InvalidEwError(
"encoded word format invalid: '{}'".format(ew.cte))
ew.charset = charset
ew.lang = lang
@@ -1101,9 +1108,12 @@ def get_unstructured(value):
token, value = get_fws(value)
unstructured.append(token)
continue
+ valid_ew = True
if value.startswith('=?'):
try:
token, value = get_encoded_word(value)
+ except _InvalidEwError:
+ valid_ew = False
except errors.HeaderParseError:
# XXX: Need to figure out how to register defects when
# appropriate here.
@@ -1125,7 +1135,10 @@ def get_unstructured(value):
# Split in the middle of an atom if there is a rfc2047 encoded word
# which does not have WSP on both sides. The defect will be registered
# the next time through the loop.
- if rfc2047_matcher.search(tok):
+ # This needs to only be performed when the encoded word is valid;
+ # otherwise, performing it on an invalid encoded word can cause
+ # the parser to go in an infinite loop.
+ if valid_ew and rfc2047_matcher.search(tok):
tok, *remainder = value.partition('=?')
vtext = ValueTerminal(tok, 'vtext')
_validate_xtext(vtext)
diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py
index b3e6b2661524..058d902459b6 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -383,6 +383,22 @@ def test_get_unstructured_ew_without_trailing_whitespace(self):
[errors.InvalidHeaderDefect],
'')
+ def test_get_unstructured_without_trailing_whitespace_hang_case(self):
+ self._test_get_x(self._get_unst,
+ '=?utf-8?q?somevalue?=aa',
+ 'somevalueaa',
+ 'somevalueaa',
+ [errors.InvalidHeaderDefect],
+ '')
+
+ def test_get_unstructured_invalid_ew(self):
+ self._test_get_x(self._get_unst,
+ '=?utf-8?q?=somevalue?=',
+ '=?utf-8?q?=somevalue?=',
+ '=?utf-8?q?=somevalue?=',
+ [],
+ '')
+
# get_qp_ctext
def test_get_qp_ctext_only(self):
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index ae9625845646..8ec39190ea8d 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -5381,6 +5381,27 @@ def test_rfc2231_unencoded_then_encoded_segments(self):
eq(language, 'en-us')
eq(s, 'My Document For You')
+ def test_should_not_hang_on_invalid_ew_messages(self):
+ messages = ["""From: user at host.com
+To: user at host.com
+Bad-Header:
+ =?us-ascii?Q?LCSwrV11+IB0rSbSker+M9vWR7wEDSuGqmHD89Gt=ea0nJFSaiz4vX3XMJPT4vrE?=
+ =?us-ascii?Q?xGUZeOnp0o22pLBB7CYLH74Js=wOlK6Tfru2U47qR?=
+ =?us-ascii?Q?72OfyEY2p2=2FrA9xNFyvH+fBTCmazxwzF8nGkK6D?=
+
+Hello!
+""", """From: ����� �������� <xxx at xxx>
+To: "xxx" <xxx at xxx>
+Subject: ��� ���������� ����� ����� � ��������� �� ����
+MIME-Version: 1.0
+Content-Type: text/plain; charset="windows-1251";
+Content-Transfer-Encoding: 8bit
+
+�� ����� � ���� ������ ��� ��������
+"""]
+ for m in messages:
+ with self.subTest(m=m):
+ msg = email.message_from_string(m)
# Tests to ensure that signed parts of an email are completely preserved, as
diff --git a/Misc/ACKS b/Misc/ACKS
index e9ae0ed56b0d..ce8b144900eb 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -1336,6 +1336,7 @@ Burton Radons
Abhilash Raj
Shorya Raj
Dhushyanth Ramasamy
+Ashwin Ramaswami
Jeff Ramnani
Bayard Randel
Varpu Rantala
diff --git a/Misc/NEWS.d/next/Security/2019-08-27-01-13-05.bpo-37764.qv67PQ.rst b/Misc/NEWS.d/next/Security/2019-08-27-01-13-05.bpo-37764.qv67PQ.rst
new file mode 100644
index 000000000000..27fa8e192f0c
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2019-08-27-01-13-05.bpo-37764.qv67PQ.rst
@@ -0,0 +1 @@
+Fixes email._header_value_parser.get_unstructured going into an infinite loop for a specific case in which the email header does not have trailing whitespace, and the case in which it contains an invalid encoded word. Patch by Ashwin Ramaswami.
\ No newline at end of file
More information about the Python-checkins
mailing list