[Python-checkins] [3.12] gh-106669: Revert "gh-102988: Detect email address parsing errors ... (GH-105127)" (GH-106733) (#106941)
gpshead
webhook-mailer at python.org
Fri Jul 21 00:05:50 EDT 2023
https://github.com/python/cpython/commit/656f62454bff35db8d630ca43c94bf6db44338ba
commit: 656f62454bff35db8d630ca43c94bf6db44338ba
branch: 3.12
author: Gregory P. Smith <greg at krypto.org>
committer: gpshead <greg at krypto.org>
date: 2023-07-21T04:05:46Z
summary:
[3.12] gh-106669: Revert "gh-102988: Detect email address parsing errors ... (GH-105127)" (GH-106733) (#106941)
This reverts commit 18dfbd035775c15533d13a98e56b1d2bf5c65f00.
Adds a regression test from the issue.
See https://github.com/python/cpython/issues/106669..
(cherry picked from commit a31dea1feb61793e48fa9aa5014f358352205c1d)
files:
A Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
M Doc/library/email.utils.rst
M Doc/whatsnew/3.12.rst
M Lib/email/utils.py
M Lib/test/test_email/test_email.py
diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst
index a87a0bd2e7de6..345b64001c1ac 100644
--- a/Doc/library/email.utils.rst
+++ b/Doc/library/email.utils.rst
@@ -65,11 +65,6 @@ of the new API.
*email address* parts. Returns a tuple of that information, unless the parse
fails, in which case a 2-tuple of ``('', '')`` is returned.
- .. versionchanged:: 3.12
- For security reasons, addresses that were ambiguous and could parse into
- multiple different addresses now cause ``('', '')`` to be returned
- instead of only one of the *potential* addresses.
-
.. function:: formataddr(pair, charset='utf-8')
@@ -92,7 +87,7 @@ of the new API.
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
*fieldvalues* is a sequence of header field values as might be returned by
:meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
- example that gets all the recipients of a message:
+ example that gets all the recipients of a message::
from email.utils import getaddresses
@@ -102,25 +97,6 @@ of the new API.
resent_ccs = msg.get_all('resent-cc', [])
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
- When parsing fails for a single fieldvalue, a 2-tuple of ``('', '')``
- is returned in its place. Other errors in parsing the list of
- addresses such as a fieldvalue seemingly parsing into multiple
- addresses may result in a list containing a single empty 2-tuple
- ``[('', '')]`` being returned rather than returning potentially
- invalid output.
-
- Example malformed input parsing:
-
- .. doctest::
-
- >>> from email.utils import getaddresses
- >>> getaddresses(['alice at example.com <bob at example.com>', 'me at example.com'])
- [('', '')]
-
- .. versionchanged:: 3.12
- The 2-tuple of ``('', '')`` in the returned values when parsing
- fails were added as to address a security issue.
-
.. function:: parsedate(date)
diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst
index d6d7b7dac9a31..068618fe48e1b 100644
--- a/Doc/whatsnew/3.12.rst
+++ b/Doc/whatsnew/3.12.rst
@@ -570,14 +570,6 @@ dis
:data:`~dis.hasarg` collection instead.
(Contributed by Irit Katriel in :gh:`94216`.)
-email
------
-
-* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return
- ``('', '')`` 2-tuples in more situations where invalid email addresses are
- encountered instead of potentially inaccurate values.
- (Contributed by Thomas Dwyer for :gh:`102988` to ameliorate CVE-2023-27043.)
-
fractions
---------
diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index 11ad75e94e934..81da5394ea169 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -106,54 +106,12 @@ def formataddr(pair, charset='utf-8'):
return address
-def _pre_parse_validation(email_header_fields):
- accepted_values = []
- for v in email_header_fields:
- s = v.replace('\\(', '').replace('\\)', '')
- if s.count('(') != s.count(')'):
- v = "('', '')"
- accepted_values.append(v)
-
- return accepted_values
-
-
-def _post_parse_validation(parsed_email_header_tuples):
- accepted_values = []
- # The parser would have parsed a correctly formatted domain-literal
- # The existence of an [ after parsing indicates a parsing failure
- for v in parsed_email_header_tuples:
- if '[' in v[1]:
- v = ('', '')
- accepted_values.append(v)
-
- return accepted_values
-
def getaddresses(fieldvalues):
- """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
-
- When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
- its place.
-
- If the resulting list of parsed address is not the same as the number of
- fieldvalues in the input list a parsing error has occurred. A list
- containing a single empty 2-tuple [('', '')] is returned in its place.
- This is done to avoid invalid output.
- """
- fieldvalues = [str(v) for v in fieldvalues]
- fieldvalues = _pre_parse_validation(fieldvalues)
- all = COMMASPACE.join(v for v in fieldvalues)
+ """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
+ all = COMMASPACE.join(str(v) for v in fieldvalues)
a = _AddressList(all)
- result = _post_parse_validation(a.addresslist)
-
- n = 0
- for v in fieldvalues:
- n += v.count(',') + 1
-
- if len(result) != n:
- return [('', '')]
-
- return result
+ return a.addresslist
def _format_timetuple_and_zone(timetuple, zone):
@@ -254,18 +212,9 @@ def parseaddr(addr):
Return a tuple of realname and email address, unless the parse fails, in
which case return a 2-tuple of ('', '').
"""
- if isinstance(addr, list):
- addr = addr[0]
-
- if not isinstance(addr, str):
- return ('', '')
-
- addr = _pre_parse_validation([addr])[0]
- addrs = _post_parse_validation(_AddressList(addr).addresslist)
-
- if not addrs or len(addrs) > 1:
- return ('', '')
-
+ addrs = _AddressList(addr).addresslist
+ if not addrs:
+ return '', ''
return addrs[0]
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index 5238944d6b478..b4f3a2481976e 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -3319,90 +3319,32 @@ def test_getaddresses(self):
[('Al Person', 'aperson at dom.ain'),
('Bud Person', 'bperson at dom.ain')])
- def test_getaddresses_parsing_errors(self):
- """Test for parsing errors from CVE-2023-27043"""
- eq = self.assertEqual
- eq(utils.getaddresses(['alice at example.org(<bob at example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice at example.org)<bob at example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice at example.org<<bob at example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice at example.org><bob at example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice at example.org@<bob at example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice at example.org,<bob at example.com>']),
- [('', 'alice at example.org'), ('', 'bob at example.com')])
- eq(utils.getaddresses(['alice at example.org;<bob at example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice at example.org:<bob at example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice at example.org.<bob at example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice at example.org"<bob at example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice at example.org[<bob at example.com>']),
- [('', '')])
- eq(utils.getaddresses(['alice at example.org]<bob at example.com>']),
- [('', '')])
-
- def test_parseaddr_parsing_errors(self):
- """Test for parsing errors from CVE-2023-27043"""
- eq = self.assertEqual
- eq(utils.parseaddr(['alice at example.org(<bob at example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice at example.org)<bob at example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice at example.org<<bob at example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice at example.org><bob at example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice at example.org@<bob at example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice at example.org,<bob at example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice at example.org;<bob at example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice at example.org:<bob at example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice at example.org.<bob at example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice at example.org"<bob at example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice at example.org[<bob at example.com>']),
- ('', ''))
- eq(utils.parseaddr(['alice at example.org]<bob at example.com>']),
- ('', ''))
+ def test_getaddresses_comma_in_name(self):
+ """GH-106669 regression test."""
+ self.assertEqual(
+ utils.getaddresses(
+ [
+ '"Bud, Person" <bperson at dom.ain>',
+ 'aperson at dom.ain (Al Person)',
+ '"Mariusz Felisiak" <to at example.com>',
+ ]
+ ),
+ [
+ ('Bud, Person', 'bperson at dom.ain'),
+ ('Al Person', 'aperson at dom.ain'),
+ ('Mariusz Felisiak', 'to at example.com'),
+ ],
+ )
def test_getaddresses_nasty(self):
eq = self.assertEqual
eq(utils.getaddresses(['foo: ;']), [('', '')])
- eq(utils.getaddresses(['[]*-- =~$']), [('', '')])
+ eq(utils.getaddresses(
+ ['[]*-- =~$']),
+ [('', ''), ('', ''), ('', '*--')])
eq(utils.getaddresses(
['foo: ;', '"Jason R. Mastaler" <jason at dom.ain>']),
[('', ''), ('Jason R. Mastaler', 'jason at dom.ain')])
- eq(utils.getaddresses(
- [r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>']),
- [('Pete (A nice ) chap his account his host)', 'pete at silly.test')])
- eq(utils.getaddresses(
- ['(Empty list)(start)Undisclosed recipients :(nobody(I know))']),
- [('', '')])
- eq(utils.getaddresses(
- ['Mary <@machine.tld:mary at example.net>, , jdoe at test . example']),
- [('Mary', 'mary at example.net'), ('', ''), ('', 'jdoe at test.example')])
- eq(utils.getaddresses(
- ['John Doe <jdoe at machine(comment). example>']),
- [('John Doe (comment)', 'jdoe at machine.example')])
- eq(utils.getaddresses(
- ['"Mary Smith: Personal Account" <smith at home.example>']),
- [('Mary Smith: Personal Account', 'smith at home.example')])
- eq(utils.getaddresses(
- ['Undisclosed recipients:;']),
- [('', '')])
- eq(utils.getaddresses(
- [r'<boss at nil.test>, "Giant; \"Big\" Box" <bob at example.net>']),
- [('', 'boss at nil.test'), ('Giant; "Big" Box', 'bob at example.net')])
def test_getaddresses_embedded_comment(self):
"""Test proper handling of a nested comment"""
diff --git a/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
new file mode 100644
index 0000000000000..c67ec45737b53
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2023-06-13-20-52-24.gh-issue-102988.Kei7Vf.rst
@@ -0,0 +1,4 @@
+Reverted the :mod:`email.utils` security improvement change released in
+3.12beta4 that unintentionally caused :mod:`email.utils.getaddresses` to fail
+to parse email addresses with a comma in the quoted name field.
+See :gh:`106669`.
More information about the Python-checkins
mailing list