[Python-checkins] cpython: Issue #13641: Decoding functions in the base64 module now accept ASCII-only

Mon Feb 20 19:33:48 CET 2012

http://hg.python.org/cpython/rev/c760bd844222
changeset:   75058:c760bd844222
user:        Antoine Pitrou <solipsis at pitrou.net>
date:        Mon Feb 20 19:30:23 2012 +0100
summary:
  Issue #13641: Decoding functions in the base64 module now accept ASCII-only unicode strings.
Patch by Catalin Iacob.

files:
  Doc/library/base64.rst  |   11 +-
  Lib/base64.py           |   26 ++-
  Lib/test/test_base64.py |  163 +++++++++++++++++----------
  Misc/NEWS               |    3 +
  4 files changed, 130 insertions(+), 73 deletions(-)

diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@@ -18,9 +18,14 @@
 
 There are two interfaces provided by this module.  The modern interface
 supports encoding and decoding ASCII byte string objects using all three
-alphabets.  The legacy interface provides for encoding and decoding to and from
-file-like objects as well as byte strings, but only using the Base64 standard
-alphabet.
+alphabets. Additionally, the decoding functions of the modern interface also
+accept Unicode strings containing only ASCII characters. The legacy interface
+provides for encoding and decoding to and from file-like objects as well as
+byte strings, but only using the Base64 standard alphabet.
+
+.. versionchanged:: 3.3
+   ASCII-only Unicode strings are now accepted by the decoding functions of
+   the modern interface.
 
 The modern interface provides:
 
diff --git a/Lib/base64.py b/Lib/base64.py
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -29,6 +29,16 @@
 
 bytes_types = (bytes, bytearray)  # Types acceptable as binary data
 
+def _bytes_from_decode_data(s):
+    if isinstance(s, str):
+        try:
+            return s.encode('ascii')
+        except UnicodeEncodeError:
+            raise ValueError('string argument should contain only ASCII characters')
+    elif isinstance(s, bytes_types):
+        return s
+    else:
+        raise TypeError("argument should be bytes or ASCII string, not %s" % s.__class__.__name__)
 
 def _translate(s, altchars):
     if not isinstance(s, bytes_types):
@@ -79,12 +89,9 @@
     discarded prior to the padding check.  If validate is True,
     non-base64-alphabet characters in the input result in a binascii.Error.
     """
-    if not isinstance(s, bytes_types):
-        raise TypeError("expected bytes, not %s" % s.__class__.__name__)
+    s = _bytes_from_decode_data(s)
     if altchars is not None:
-        if not isinstance(altchars, bytes_types):
-            raise TypeError("expected bytes, not %s"
-                            % altchars.__class__.__name__)
+        altchars = _bytes_from_decode_data(altchars)
         assert len(altchars) == 2, repr(altchars)
         s = _translate(s, {chr(altchars[0]): b'+', chr(altchars[1]): b'/'})
     if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
@@ -211,8 +218,7 @@
     the input is incorrectly padded or if there are non-alphabet
     characters present in the input.
     """
-    if not isinstance(s, bytes_types):
-        raise TypeError("expected bytes, not %s" % s.__class__.__name__)
+    s = _bytes_from_decode_data(s)
     quanta, leftover = divmod(len(s), 8)
     if leftover:
         raise binascii.Error('Incorrect padding')
@@ -220,8 +226,7 @@
     # False, or the character to map the digit 1 (one) to.  It should be
     # either L (el) or I (eye).
     if map01 is not None:
-        if not isinstance(map01, bytes_types):
-            raise TypeError("expected bytes, not %s" % map01.__class__.__name__)
+        map01 = _bytes_from_decode_data(map01)
         assert len(map01) == 1, repr(map01)
         s = _translate(s, {b'0': b'O', b'1': map01})
     if casefold:
@@ -292,8 +297,7 @@
     s were incorrectly padded or if there are non-alphabet characters
     present in the string.
     """
-    if not isinstance(s, bytes_types):
-        raise TypeError("expected bytes, not %s" % s.__class__.__name__)
+    s = _bytes_from_decode_data(s)
     if casefold:
         s = s.upper()
     if re.search(b'[^0-9A-F]', s):
diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -102,44 +102,53 @@
 
     def test_b64decode(self):
         eq = self.assertEqual
-        eq(base64.b64decode(b"d3d3LnB5dGhvbi5vcmc="), b"www.python.org")
-        eq(base64.b64decode(b'AA=='), b'\x00')
-        eq(base64.b64decode(b"YQ=="), b"a")
-        eq(base64.b64decode(b"YWI="), b"ab")
-        eq(base64.b64decode(b"YWJj"), b"abc")
-        eq(base64.b64decode(b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
-                            b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT"
-                            b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="),
-           b"abcdefghijklmnopqrstuvwxyz"
-           b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-           b"0123456789!@#0^&*();:<>,. []{}")
-        eq(base64.b64decode(b''), b'')
+
+        tests = {b"d3d3LnB5dGhvbi5vcmc=": b"www.python.org",
+                 b'AA==': b'\x00',
+                 b"YQ==": b"a",
+                 b"YWI=": b"ab",
+                 b"YWJj": b"abc",
+                 b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
+                 b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT"
+                 b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==":
+
+                 b"abcdefghijklmnopqrstuvwxyz"
+                 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                 b"0123456789!@#0^&*();:<>,. []{}",
+                 b'': b'',
+                 }
+        for data, res in tests.items():
+            eq(base64.b64decode(data), res)
+            eq(base64.b64decode(data.decode('ascii')), res)
+
         # Test with arbitrary alternative characters
-        eq(base64.b64decode(b'01a*b$cd', altchars=b'*$'), b'\xd3V\xbeo\xf7\x1d')
-        # Check if passing a str object raises an error
-        self.assertRaises(TypeError, base64.b64decode, "")
-        self.assertRaises(TypeError, base64.b64decode, b"", altchars="")
+        tests_altchars = {(b'01a*b$cd', b'*$'): b'\xd3V\xbeo\xf7\x1d',
+                          }
+        for (data, altchars), res in tests_altchars.items():
+            data_str = data.decode('ascii')
+            altchars_str = altchars.decode('ascii')
+
+            eq(base64.b64decode(data, altchars=altchars), res)
+            eq(base64.b64decode(data_str, altchars=altchars), res)
+            eq(base64.b64decode(data, altchars=altchars_str), res)
+            eq(base64.b64decode(data_str, altchars=altchars_str), res)
+
         # Test standard alphabet
-        eq(base64.standard_b64decode(b"d3d3LnB5dGhvbi5vcmc="), b"www.python.org")
-        eq(base64.standard_b64decode(b"YQ=="), b"a")
-        eq(base64.standard_b64decode(b"YWI="), b"ab")
-        eq(base64.standard_b64decode(b"YWJj"), b"abc")
-        eq(base64.standard_b64decode(b""), b"")
-        eq(base64.standard_b64decode(b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
-                                     b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT"
-                                     b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="),
-           b"abcdefghijklmnopqrstuvwxyz"
-           b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-           b"0123456789!@#0^&*();:<>,. []{}")
-        # Check if passing a str object raises an error
-        self.assertRaises(TypeError, base64.standard_b64decode, "")
-        self.assertRaises(TypeError, base64.standard_b64decode, b"", altchars="")
+        for data, res in tests.items():
+            eq(base64.standard_b64decode(data), res)
+            eq(base64.standard_b64decode(data.decode('ascii')), res)
+
         # Test with 'URL safe' alternative characters
-        eq(base64.urlsafe_b64decode(b'01a-b_cd'), b'\xd3V\xbeo\xf7\x1d')
-        self.assertRaises(TypeError, base64.urlsafe_b64decode, "")
+        tests_urlsafe = {b'01a-b_cd': b'\xd3V\xbeo\xf7\x1d',
+                         b'': b'',
+                         }
+        for data, res in tests_urlsafe.items():
+            eq(base64.urlsafe_b64decode(data), res)
+            eq(base64.urlsafe_b64decode(data.decode('ascii')), res)
 
     def test_b64decode_padding_error(self):
         self.assertRaises(binascii.Error, base64.b64decode, b'abc')
+        self.assertRaises(binascii.Error, base64.b64decode, 'abc')
 
     def test_b64decode_invalid_chars(self):
         # issue 1466065: Test some invalid characters.
@@ -154,8 +163,10 @@
                  (b'YWJj\nYWI=', b'abcab'))
         for bstr, res in tests:
             self.assertEqual(base64.b64decode(bstr), res)
+            self.assertEqual(base64.b64decode(bstr.decode('ascii')), res)
             with self.assertRaises(binascii.Error):
                 base64.b64decode(bstr, validate=True)
+                base64.b64decode(bstr.decode('ascii'), validate=True)
 
     def test_b32encode(self):
         eq = self.assertEqual
@@ -170,40 +181,62 @@
 
     def test_b32decode(self):
         eq = self.assertEqual
-        eq(base64.b32decode(b''), b'')
-        eq(base64.b32decode(b'AA======'), b'\x00')
-        eq(base64.b32decode(b'ME======'), b'a')
-        eq(base64.b32decode(b'MFRA===='), b'ab')
-        eq(base64.b32decode(b'MFRGG==='), b'abc')
-        eq(base64.b32decode(b'MFRGGZA='), b'abcd')
-        eq(base64.b32decode(b'MFRGGZDF'), b'abcde')
-        self.assertRaises(TypeError, base64.b32decode, "")
+        tests = {b'': b'',
+                 b'AA======': b'\x00',
+                 b'ME======': b'a',
+                 b'MFRA====': b'ab',
+                 b'MFRGG===': b'abc',
+                 b'MFRGGZA=': b'abcd',
+                 b'MFRGGZDF': b'abcde',
+                 }
+        for data, res in tests.items():
+            eq(base64.b32decode(data), res)
+            eq(base64.b32decode(data.decode('ascii')), res)
 
     def test_b32decode_casefold(self):
         eq = self.assertEqual
-        eq(base64.b32decode(b'', True), b'')
-        eq(base64.b32decode(b'ME======', True), b'a')
-        eq(base64.b32decode(b'MFRA====', True), b'ab')
-        eq(base64.b32decode(b'MFRGG===', True), b'abc')
-        eq(base64.b32decode(b'MFRGGZA=', True), b'abcd')
-        eq(base64.b32decode(b'MFRGGZDF', True), b'abcde')
-        # Lower cases
-        eq(base64.b32decode(b'me======', True), b'a')
-        eq(base64.b32decode(b'mfra====', True), b'ab')
-        eq(base64.b32decode(b'mfrgg===', True), b'abc')
-        eq(base64.b32decode(b'mfrggza=', True), b'abcd')
-        eq(base64.b32decode(b'mfrggzdf', True), b'abcde')
-        # Expected exceptions
+        tests = {b'': b'',
+                 b'ME======': b'a',
+                 b'MFRA====': b'ab',
+                 b'MFRGG===': b'abc',
+                 b'MFRGGZA=': b'abcd',
+                 b'MFRGGZDF': b'abcde',
+                 # Lower cases
+                 b'me======': b'a',
+                 b'mfra====': b'ab',
+                 b'mfrgg===': b'abc',
+                 b'mfrggza=': b'abcd',
+                 b'mfrggzdf': b'abcde',
+                 }
+
+        for data, res in tests.items():
+            eq(base64.b32decode(data, True), res)
+            eq(base64.b32decode(data.decode('ascii'), True), res)
+
         self.assertRaises(TypeError, base64.b32decode, b'me======')
+        self.assertRaises(TypeError, base64.b32decode, 'me======')
+
         # Mapping zero and one
         eq(base64.b32decode(b'MLO23456'), b'b\xdd\xad\xf3\xbe')
-        eq(base64.b32decode(b'M1023456', map01=b'L'), b'b\xdd\xad\xf3\xbe')
-        eq(base64.b32decode(b'M1023456', map01=b'I'), b'b\x1d\xad\xf3\xbe')
-        self.assertRaises(TypeError, base64.b32decode, b"", map01="")
+        eq(base64.b32decode('MLO23456'), b'b\xdd\xad\xf3\xbe')
+
+        map_tests = {(b'M1023456', b'L'): b'b\xdd\xad\xf3\xbe',
+                     (b'M1023456', b'I'): b'b\x1d\xad\xf3\xbe',
+                     }
+        for (data, map01), res in map_tests.items():
+            data_str = data.decode('ascii')
+            map01_str = map01.decode('ascii')
+
+            eq(base64.b32decode(data, map01=map01), res)
+            eq(base64.b32decode(data_str, map01=map01), res)
+            eq(base64.b32decode(data, map01=map01_str), res)
+            eq(base64.b32decode(data_str, map01=map01_str), res)
 
     def test_b32decode_error(self):
-        self.assertRaises(binascii.Error, base64.b32decode, b'abc')
-        self.assertRaises(binascii.Error, base64.b32decode, b'ABCDEF==')
+        for data in [b'abc', b'ABCDEF==']:
+            with self.assertRaises(binascii.Error):
+                base64.b32decode(data)
+                base64.b32decode(data.decode('ascii'))
 
     def test_b16encode(self):
         eq = self.assertEqual
@@ -214,12 +247,24 @@
     def test_b16decode(self):
         eq = self.assertEqual
         eq(base64.b16decode(b'0102ABCDEF'), b'\x01\x02\xab\xcd\xef')
+        eq(base64.b16decode('0102ABCDEF'), b'\x01\x02\xab\xcd\xef')
         eq(base64.b16decode(b'00'), b'\x00')
+        eq(base64.b16decode('00'), b'\x00')
         # Lower case is not allowed without a flag
         self.assertRaises(binascii.Error, base64.b16decode, b'0102abcdef')
+        self.assertRaises(binascii.Error, base64.b16decode, '0102abcdef')
         # Case fold
         eq(base64.b16decode(b'0102abcdef', True), b'\x01\x02\xab\xcd\xef')
-        self.assertRaises(TypeError, base64.b16decode, "")
+        eq(base64.b16decode('0102abcdef', True), b'\x01\x02\xab\xcd\xef')
+
+    def test_decode_nonascii_str(self):
+        decode_funcs = (base64.b64decode,
+                        base64.standard_b64decode,
+                        base64.urlsafe_b64decode,
+                        base64.b32decode,
+                        base64.b16decode)
+        for f in decode_funcs:
+            self.assertRaises(ValueError, f, 'with non-ascii \xcb')
 
     def test_ErrorHeritage(self):
         self.assertTrue(issubclass(binascii.Error, ValueError))
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -469,6 +469,9 @@
 Library
 -------
 
+- Issue #13641: Decoding functions in the base64 module now accept ASCII-only
+  unicode strings.  Patch by Catalin Iacob.
+
 - Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a
   new importlib.invalidate_caches() function.
 

-- 
Repository URL: http://hg.python.org/cpython