[Python-checkins] bpo-27397: Make email module properly handle invalid-length base64 strings (GH-7583) (GH-7664)
Tal Einat
webhook-mailer at python.org
Tue Jun 12 09:46:14 EDT 2018
https://github.com/python/cpython/commit/7b82281c80d0064559866afe92f19cae5978c841
commit: 7b82281c80d0064559866afe92f19cae5978c841
branch: 3.7
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: Tal Einat <taleinat+github at gmail.com>
date: 2018-06-12T16:46:11+03:00
summary:
bpo-27397: Make email module properly handle invalid-length base64 strings (GH-7583) (GH-7664)
When attempting to base64-decode a payload of invalid length (1 mod 4),
properly recognize and handle it. The given data will be returned as-is,
i.e. not decoded, along with a new defect, InvalidBase64LengthDefect.
(cherry picked from commit c3f55be7dd012b7e92901627d0b31c21e983ccb4)
Co-authored-by: Tal Einat <taleinat+github at gmail.com>
files:
A Misc/NEWS.d/next/Library/2018-06-10-09-43-54.bpo-27397.0_fFQR.rst
M Doc/library/email.errors.rst
M Lib/email/_encoded_words.py
M Lib/email/errors.py
M Lib/test/test_email/test__encoded_words.py
M Lib/test/test_email/test__header_value_parser.py
M Lib/test/test_email/test_defect_handling.py
diff --git a/Doc/library/email.errors.rst b/Doc/library/email.errors.rst
index 5838767b18f7..511ad1635831 100644
--- a/Doc/library/email.errors.rst
+++ b/Doc/library/email.errors.rst
@@ -108,3 +108,7 @@ All defect classes are subclassed from :class:`email.errors.MessageDefect`.
* :class:`InvalidBase64CharactersDefect` -- When decoding a block of base64
encoded bytes, characters outside the base64 alphabet were encountered.
The characters are ignored, but the resulting decoded bytes may be invalid.
+
+* :class:`InvalidBase64LengthDefect` -- When decoding a block of base64 encoded
+ bytes, the number of non-padding base64 characters was invalid (1 more than
+ a multiple of 4). The encoded block was kept as-is.
diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py
index c40ffa917b56..295ae7eb2123 100644
--- a/Lib/email/_encoded_words.py
+++ b/Lib/email/_encoded_words.py
@@ -98,30 +98,42 @@ def len_q(bstring):
#
def decode_b(encoded):
- defects = []
+ # First try encoding with validate=True, fixing the padding if needed.
+ # This will succeed only if encoded includes no invalid characters.
pad_err = len(encoded) % 4
- if pad_err:
- defects.append(errors.InvalidBase64PaddingDefect())
- padded_encoded = encoded + b'==='[:4-pad_err]
- else:
- padded_encoded = encoded
+ missing_padding = b'==='[:4-pad_err] if pad_err else b''
try:
- return base64.b64decode(padded_encoded, validate=True), defects
+ return (
+ base64.b64decode(encoded + missing_padding, validate=True),
+ [errors.InvalidBase64PaddingDefect()] if pad_err else [],
+ )
except binascii.Error:
- # Since we had correct padding, this must an invalid char error.
- defects = [errors.InvalidBase64CharactersDefect()]
+ # Since we had correct padding, this is likely an invalid char error.
+ #
# The non-alphabet characters are ignored as far as padding
- # goes, but we don't know how many there are. So we'll just
- # try various padding lengths until something works.
- for i in 0, 1, 2, 3:
+ # goes, but we don't know how many there are. So try without adding
+ # padding to see if it works.
+ try:
+ return (
+ base64.b64decode(encoded, validate=False),
+ [errors.InvalidBase64CharactersDefect()],
+ )
+ except binascii.Error:
+ # Add as much padding as could possibly be necessary (extra padding
+ # is ignored).
try:
- return base64.b64decode(encoded+b'='*i, validate=False), defects
+ return (
+ base64.b64decode(encoded + b'==', validate=False),
+ [errors.InvalidBase64CharactersDefect(),
+ errors.InvalidBase64PaddingDefect()],
+ )
except binascii.Error:
- if i==0:
- defects.append(errors.InvalidBase64PaddingDefect())
- else:
- # This should never happen.
- raise AssertionError("unexpected binascii.Error")
+ # This only happens when the encoded string's length is 1 more
+ # than a multiple of 4, which is invalid.
+ #
+ # bpo-27397: Just return the encoded string since there's no
+ # way to decode.
+ return encoded, [errors.InvalidBase64LengthDefect()]
def encode_b(bstring):
return base64.b64encode(bstring).decode('ascii')
diff --git a/Lib/email/errors.py b/Lib/email/errors.py
index 791239fa6a54..d28a6800104b 100644
--- a/Lib/email/errors.py
+++ b/Lib/email/errors.py
@@ -73,6 +73,9 @@ class InvalidBase64PaddingDefect(MessageDefect):
class InvalidBase64CharactersDefect(MessageDefect):
"""base64 encoded sequence had characters not in base64 alphabet"""
+class InvalidBase64LengthDefect(MessageDefect):
+ """base64 encoded sequence had invalid length (1 mod 4)"""
+
# These errors are specific to header parsing.
class HeaderDefect(MessageDefect):
diff --git a/Lib/test/test_email/test__encoded_words.py b/Lib/test/test_email/test__encoded_words.py
index 900e1d0e64d4..5a59aebba89b 100644
--- a/Lib/test/test_email/test__encoded_words.py
+++ b/Lib/test/test_email/test__encoded_words.py
@@ -33,7 +33,10 @@ def test_simple(self):
self._test(b'Zm9v', b'foo')
def test_missing_padding(self):
+ # 1 missing padding character
self._test(b'dmk', b'vi', [errors.InvalidBase64PaddingDefect])
+ # 2 missing padding characters
+ self._test(b'dg', b'v', [errors.InvalidBase64PaddingDefect])
def test_invalid_character(self):
self._test(b'dm\x01k===', b'vi', [errors.InvalidBase64CharactersDefect])
@@ -42,6 +45,9 @@ def test_invalid_character_and_bad_padding(self):
self._test(b'dm\x01k', b'vi', [errors.InvalidBase64CharactersDefect,
errors.InvalidBase64PaddingDefect])
+ def test_invalid_length(self):
+ self._test(b'abcde', b'abcde', [errors.InvalidBase64LengthDefect])
+
class TestDecode(TestEmailBase):
diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py
index 5cdc4bcecad4..5036de2ca0c3 100644
--- a/Lib/test/test_email/test__header_value_parser.py
+++ b/Lib/test/test_email/test__header_value_parser.py
@@ -347,6 +347,15 @@ def test_get_unstructured_invalid_base64_character_and_bad_padding(self):
errors.InvalidBase64PaddingDefect],
'')
+ def test_get_unstructured_invalid_base64_length(self):
+ # bpo-27397: Return the encoded string since there's no way to decode.
+ self._test_get_x(self._get_unst,
+ '=?utf-8?b?abcde?=',
+ 'abcde',
+ 'abcde',
+ [errors.InvalidBase64LengthDefect],
+ '')
+
def test_get_unstructured_no_whitespace_between_ews(self):
self._test_get_x(self._get_unst,
'=?utf-8?q?foo?==?utf-8?q?bar?=',
diff --git a/Lib/test/test_email/test_defect_handling.py b/Lib/test/test_email/test_defect_handling.py
index f36b90757399..781f65741822 100644
--- a/Lib/test/test_email/test_defect_handling.py
+++ b/Lib/test/test_email/test_defect_handling.py
@@ -254,6 +254,23 @@ def test_invalid_chars_in_base64_payload(self):
self.assertDefectsEqual(self.get_defects(msg),
[errors.InvalidBase64CharactersDefect])
+ def test_invalid_length_of_base64_payload(self):
+ source = textwrap.dedent("""\
+ Subject: test
+ MIME-Version: 1.0
+ Content-Type: text/plain; charset="utf-8"
+ Content-Transfer-Encoding: base64
+
+ abcde
+ """)
+ msg = self._str_msg(source)
+ with self._raise_point(errors.InvalidBase64LengthDefect):
+ payload = msg.get_payload(decode=True)
+ if self.raise_expected: return
+ self.assertEqual(payload, b'abcde')
+ self.assertDefectsEqual(self.get_defects(msg),
+ [errors.InvalidBase64LengthDefect])
+
def test_missing_ending_boundary(self):
source = textwrap.dedent("""\
To: 1 at harrydomain4.com
diff --git a/Misc/NEWS.d/next/Library/2018-06-10-09-43-54.bpo-27397.0_fFQR.rst b/Misc/NEWS.d/next/Library/2018-06-10-09-43-54.bpo-27397.0_fFQR.rst
new file mode 100644
index 000000000000..109817267bd0
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-06-10-09-43-54.bpo-27397.0_fFQR.rst
@@ -0,0 +1 @@
+Make email module properly handle invalid-length base64 strings.
More information about the Python-checkins
mailing list