[Python-checkins] cpython (merge default -> default): Merged upstream changes.

Mon Feb 20 19:51:12 CET 2012

http://hg.python.org/cpython/rev/ae960e5ae112
changeset:   75064:ae960e5ae112
parent:      75063:2b4a553bd6ed
parent:      75059:42f61304f77d
user:        Vinay Sajip <vinay_sajip at yahoo.co.uk>
date:        Mon Feb 20 18:50:33 2012 +0000
summary:
  Merged upstream changes.

files:
  Doc/library/base64.rst  |   11 +-
  Doc/whatsnew/3.3.rst    |   14 ++
  Lib/base64.py           |   26 ++-
  Lib/test/test_base64.py |  163 +++++++++++++++++----------
  Misc/NEWS               |    6 +
  Python/dynload_aix.c    |    1 -
  Python/dynload_dl.c     |    1 -
  Python/dynload_hpux.c   |    1 -
  Python/dynload_next.c   |    1 -
  Python/dynload_shlib.c  |    6 -
  10 files changed, 147 insertions(+), 83 deletions(-)

diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@@ -18,9 +18,14 @@
 
 There are two interfaces provided by this module.  The modern interface
 supports encoding and decoding ASCII byte string objects using all three
-alphabets.  The legacy interface provides for encoding and decoding to and from
-file-like objects as well as byte strings, but only using the Base64 standard
-alphabet.
+alphabets. Additionally, the decoding functions of the modern interface also
+accept Unicode strings containing only ASCII characters. The legacy interface
+provides for encoding and decoding to and from file-like objects as well as
+byte strings, but only using the Base64 standard alphabet.
+
+.. versionchanged:: 3.3
+   ASCII-only Unicode strings are now accepted by the decoding functions of
+   the modern interface.
 
 The modern interface provides:
 
diff --git a/Doc/whatsnew/3.3.rst b/Doc/whatsnew/3.3.rst
--- a/Doc/whatsnew/3.3.rst
+++ b/Doc/whatsnew/3.3.rst
@@ -939,6 +939,20 @@
   :c:func:`PyUnicode_FromFormat()`, your code will automatically take
   advantage of the new unicode representations.
 
+Building C extensions
+---------------------
+
+* The range of possible file names for C extensions has been narrowed.
+  Very rarely used spellings have been suppressed: under POSIX, files
+  named ``xxxmodule.so``, ``xxxmodule.abi3.so`` and
+  ``xxxmodule.cpython-*.so`` are no longer recognized as implementing
+  the ``xxx`` module.  If you had been generating such files, you have
+  to switch to the other spellings (i.e., remove the ``module`` string
+  from the file names).
+
+  (implemented in :issue:`14040`.)
+
+
 Other issues
 ------------
 
diff --git a/Lib/base64.py b/Lib/base64.py
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -29,6 +29,16 @@
 
 bytes_types = (bytes, bytearray)  # Types acceptable as binary data
 
+def _bytes_from_decode_data(s):
+    if isinstance(s, str):
+        try:
+            return s.encode('ascii')
+        except UnicodeEncodeError:
+            raise ValueError('string argument should contain only ASCII characters')
+    elif isinstance(s, bytes_types):
+        return s
+    else:
+        raise TypeError("argument should be bytes or ASCII string, not %s" % s.__class__.__name__)
 
 def _translate(s, altchars):
     if not isinstance(s, bytes_types):
@@ -79,12 +89,9 @@
     discarded prior to the padding check.  If validate is True,
     non-base64-alphabet characters in the input result in a binascii.Error.
     """
-    if not isinstance(s, bytes_types):
-        raise TypeError("expected bytes, not %s" % s.__class__.__name__)
+    s = _bytes_from_decode_data(s)
     if altchars is not None:
-        if not isinstance(altchars, bytes_types):
-            raise TypeError("expected bytes, not %s"
-                            % altchars.__class__.__name__)
+        altchars = _bytes_from_decode_data(altchars)
         assert len(altchars) == 2, repr(altchars)
         s = _translate(s, {chr(altchars[0]): b'+', chr(altchars[1]): b'/'})
     if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
@@ -211,8 +218,7 @@
     the input is incorrectly padded or if there are non-alphabet
     characters present in the input.
     """
-    if not isinstance(s, bytes_types):
-        raise TypeError("expected bytes, not %s" % s.__class__.__name__)
+    s = _bytes_from_decode_data(s)
     quanta, leftover = divmod(len(s), 8)
     if leftover:
         raise binascii.Error('Incorrect padding')
@@ -220,8 +226,7 @@
     # False, or the character to map the digit 1 (one) to.  It should be
     # either L (el) or I (eye).
     if map01 is not None:
-        if not isinstance(map01, bytes_types):
-            raise TypeError("expected bytes, not %s" % map01.__class__.__name__)
+        map01 = _bytes_from_decode_data(map01)
         assert len(map01) == 1, repr(map01)
         s = _translate(s, {b'0': b'O', b'1': map01})
     if casefold:
@@ -292,8 +297,7 @@
     s were incorrectly padded or if there are non-alphabet characters
     present in the string.
     """
-    if not isinstance(s, bytes_types):
-        raise TypeError("expected bytes, not %s" % s.__class__.__name__)
+    s = _bytes_from_decode_data(s)
     if casefold:
         s = s.upper()
     if re.search(b'[^0-9A-F]', s):
diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -102,44 +102,53 @@
 
     def test_b64decode(self):
         eq = self.assertEqual
-        eq(base64.b64decode(b"d3d3LnB5dGhvbi5vcmc="), b"www.python.org")
-        eq(base64.b64decode(b'AA=='), b'\x00')
-        eq(base64.b64decode(b"YQ=="), b"a")
-        eq(base64.b64decode(b"YWI="), b"ab")
-        eq(base64.b64decode(b"YWJj"), b"abc")
-        eq(base64.b64decode(b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
-                            b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT"
-                            b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="),
-           b"abcdefghijklmnopqrstuvwxyz"
-           b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-           b"0123456789!@#0^&*();:<>,. []{}")
-        eq(base64.b64decode(b''), b'')
+
+        tests = {b"d3d3LnB5dGhvbi5vcmc=": b"www.python.org",
+                 b'AA==': b'\x00',
+                 b"YQ==": b"a",
+                 b"YWI=": b"ab",
+                 b"YWJj": b"abc",
+                 b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
+                 b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT"
+                 b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==":
+
+                 b"abcdefghijklmnopqrstuvwxyz"
+                 b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                 b"0123456789!@#0^&*();:<>,. []{}",
+                 b'': b'',
+                 }
+        for data, res in tests.items():
+            eq(base64.b64decode(data), res)
+            eq(base64.b64decode(data.decode('ascii')), res)
+
         # Test with arbitrary alternative characters
-        eq(base64.b64decode(b'01a*b$cd', altchars=b'*$'), b'\xd3V\xbeo\xf7\x1d')
-        # Check if passing a str object raises an error
-        self.assertRaises(TypeError, base64.b64decode, "")
-        self.assertRaises(TypeError, base64.b64decode, b"", altchars="")
+        tests_altchars = {(b'01a*b$cd', b'*$'): b'\xd3V\xbeo\xf7\x1d',
+                          }
+        for (data, altchars), res in tests_altchars.items():
+            data_str = data.decode('ascii')
+            altchars_str = altchars.decode('ascii')
+
+            eq(base64.b64decode(data, altchars=altchars), res)
+            eq(base64.b64decode(data_str, altchars=altchars), res)
+            eq(base64.b64decode(data, altchars=altchars_str), res)
+            eq(base64.b64decode(data_str, altchars=altchars_str), res)
+
         # Test standard alphabet
-        eq(base64.standard_b64decode(b"d3d3LnB5dGhvbi5vcmc="), b"www.python.org")
-        eq(base64.standard_b64decode(b"YQ=="), b"a")
-        eq(base64.standard_b64decode(b"YWI="), b"ab")
-        eq(base64.standard_b64decode(b"YWJj"), b"abc")
-        eq(base64.standard_b64decode(b""), b"")
-        eq(base64.standard_b64decode(b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
-                                     b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT"
-                                     b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="),
-           b"abcdefghijklmnopqrstuvwxyz"
-           b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
-           b"0123456789!@#0^&*();:<>,. []{}")
-        # Check if passing a str object raises an error
-        self.assertRaises(TypeError, base64.standard_b64decode, "")
-        self.assertRaises(TypeError, base64.standard_b64decode, b"", altchars="")
+        for data, res in tests.items():
+            eq(base64.standard_b64decode(data), res)
+            eq(base64.standard_b64decode(data.decode('ascii')), res)
+
         # Test with 'URL safe' alternative characters
-        eq(base64.urlsafe_b64decode(b'01a-b_cd'), b'\xd3V\xbeo\xf7\x1d')
-        self.assertRaises(TypeError, base64.urlsafe_b64decode, "")
+        tests_urlsafe = {b'01a-b_cd': b'\xd3V\xbeo\xf7\x1d',
+                         b'': b'',
+                         }
+        for data, res in tests_urlsafe.items():
+            eq(base64.urlsafe_b64decode(data), res)
+            eq(base64.urlsafe_b64decode(data.decode('ascii')), res)
 
     def test_b64decode_padding_error(self):
         self.assertRaises(binascii.Error, base64.b64decode, b'abc')
+        self.assertRaises(binascii.Error, base64.b64decode, 'abc')
 
     def test_b64decode_invalid_chars(self):
         # issue 1466065: Test some invalid characters.
@@ -154,8 +163,10 @@
                  (b'YWJj\nYWI=', b'abcab'))
         for bstr, res in tests:
             self.assertEqual(base64.b64decode(bstr), res)
+            self.assertEqual(base64.b64decode(bstr.decode('ascii')), res)
             with self.assertRaises(binascii.Error):
                 base64.b64decode(bstr, validate=True)
+                base64.b64decode(bstr.decode('ascii'), validate=True)
 
     def test_b32encode(self):
         eq = self.assertEqual
@@ -170,40 +181,62 @@
 
     def test_b32decode(self):
         eq = self.assertEqual
-        eq(base64.b32decode(b''), b'')
-        eq(base64.b32decode(b'AA======'), b'\x00')
-        eq(base64.b32decode(b'ME======'), b'a')
-        eq(base64.b32decode(b'MFRA===='), b'ab')
-        eq(base64.b32decode(b'MFRGG==='), b'abc')
-        eq(base64.b32decode(b'MFRGGZA='), b'abcd')
-        eq(base64.b32decode(b'MFRGGZDF'), b'abcde')
-        self.assertRaises(TypeError, base64.b32decode, "")
+        tests = {b'': b'',
+                 b'AA======': b'\x00',
+                 b'ME======': b'a',
+                 b'MFRA====': b'ab',
+                 b'MFRGG===': b'abc',
+                 b'MFRGGZA=': b'abcd',
+                 b'MFRGGZDF': b'abcde',
+                 }
+        for data, res in tests.items():
+            eq(base64.b32decode(data), res)
+            eq(base64.b32decode(data.decode('ascii')), res)
 
     def test_b32decode_casefold(self):
         eq = self.assertEqual
-        eq(base64.b32decode(b'', True), b'')
-        eq(base64.b32decode(b'ME======', True), b'a')
-        eq(base64.b32decode(b'MFRA====', True), b'ab')
-        eq(base64.b32decode(b'MFRGG===', True), b'abc')
-        eq(base64.b32decode(b'MFRGGZA=', True), b'abcd')
-        eq(base64.b32decode(b'MFRGGZDF', True), b'abcde')
-        # Lower cases
-        eq(base64.b32decode(b'me======', True), b'a')
-        eq(base64.b32decode(b'mfra====', True), b'ab')
-        eq(base64.b32decode(b'mfrgg===', True), b'abc')
-        eq(base64.b32decode(b'mfrggza=', True), b'abcd')
-        eq(base64.b32decode(b'mfrggzdf', True), b'abcde')
-        # Expected exceptions
+        tests = {b'': b'',
+                 b'ME======': b'a',
+                 b'MFRA====': b'ab',
+                 b'MFRGG===': b'abc',
+                 b'MFRGGZA=': b'abcd',
+                 b'MFRGGZDF': b'abcde',
+                 # Lower cases
+                 b'me======': b'a',
+                 b'mfra====': b'ab',
+                 b'mfrgg===': b'abc',
+                 b'mfrggza=': b'abcd',
+                 b'mfrggzdf': b'abcde',
+                 }
+
+        for data, res in tests.items():
+            eq(base64.b32decode(data, True), res)
+            eq(base64.b32decode(data.decode('ascii'), True), res)
+
         self.assertRaises(TypeError, base64.b32decode, b'me======')
+        self.assertRaises(TypeError, base64.b32decode, 'me======')
+
         # Mapping zero and one
         eq(base64.b32decode(b'MLO23456'), b'b\xdd\xad\xf3\xbe')
-        eq(base64.b32decode(b'M1023456', map01=b'L'), b'b\xdd\xad\xf3\xbe')
-        eq(base64.b32decode(b'M1023456', map01=b'I'), b'b\x1d\xad\xf3\xbe')
-        self.assertRaises(TypeError, base64.b32decode, b"", map01="")
+        eq(base64.b32decode('MLO23456'), b'b\xdd\xad\xf3\xbe')
+
+        map_tests = {(b'M1023456', b'L'): b'b\xdd\xad\xf3\xbe',
+                     (b'M1023456', b'I'): b'b\x1d\xad\xf3\xbe',
+                     }
+        for (data, map01), res in map_tests.items():
+            data_str = data.decode('ascii')
+            map01_str = map01.decode('ascii')
+
+            eq(base64.b32decode(data, map01=map01), res)
+            eq(base64.b32decode(data_str, map01=map01), res)
+            eq(base64.b32decode(data, map01=map01_str), res)
+            eq(base64.b32decode(data_str, map01=map01_str), res)
 
     def test_b32decode_error(self):
-        self.assertRaises(binascii.Error, base64.b32decode, b'abc')
-        self.assertRaises(binascii.Error, base64.b32decode, b'ABCDEF==')
+        for data in [b'abc', b'ABCDEF==']:
+            with self.assertRaises(binascii.Error):
+                base64.b32decode(data)
+                base64.b32decode(data.decode('ascii'))
 
     def test_b16encode(self):
         eq = self.assertEqual
@@ -214,12 +247,24 @@
     def test_b16decode(self):
         eq = self.assertEqual
         eq(base64.b16decode(b'0102ABCDEF'), b'\x01\x02\xab\xcd\xef')
+        eq(base64.b16decode('0102ABCDEF'), b'\x01\x02\xab\xcd\xef')
         eq(base64.b16decode(b'00'), b'\x00')
+        eq(base64.b16decode('00'), b'\x00')
         # Lower case is not allowed without a flag
         self.assertRaises(binascii.Error, base64.b16decode, b'0102abcdef')
+        self.assertRaises(binascii.Error, base64.b16decode, '0102abcdef')
         # Case fold
         eq(base64.b16decode(b'0102abcdef', True), b'\x01\x02\xab\xcd\xef')
-        self.assertRaises(TypeError, base64.b16decode, "")
+        eq(base64.b16decode('0102abcdef', True), b'\x01\x02\xab\xcd\xef')
+
+    def test_decode_nonascii_str(self):
+        decode_funcs = (base64.b64decode,
+                        base64.standard_b64decode,
+                        base64.urlsafe_b64decode,
+                        base64.b32decode,
+                        base64.b16decode)
+        for f in decode_funcs:
+            self.assertRaises(ValueError, f, 'with non-ascii \xcb')
 
     def test_ErrorHeritage(self):
         self.assertTrue(issubclass(binascii.Error, ValueError))
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
 Core and Builtins
 -----------------
 
+- Issue #14040: Remove rarely used file name suffixes for C extensions
+  (under POSIX mainly).
+
 - Issue #14051: Allow arbitrary attributes to be set of classmethod and
   staticmethod.
 
@@ -469,6 +472,9 @@
 Library
 -------
 
+- Issue #13641: Decoding functions in the base64 module now accept ASCII-only
+  unicode strings.  Patch by Catalin Iacob.
+
 - Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a
   new importlib.invalidate_caches() function.
 
diff --git a/Python/dynload_aix.c b/Python/dynload_aix.c
--- a/Python/dynload_aix.c
+++ b/Python/dynload_aix.c
@@ -28,7 +28,6 @@
 
 const struct filedescr _PyImport_DynLoadFiletab[] = {
     {".so", "rb", C_EXTENSION},
-    {"module.so", "rb", C_EXTENSION},
     {0, 0}
 };
 
diff --git a/Python/dynload_dl.c b/Python/dynload_dl.c
--- a/Python/dynload_dl.c
+++ b/Python/dynload_dl.c
@@ -11,7 +11,6 @@
 
 const struct filedescr _PyImport_DynLoadFiletab[] = {
     {".o", "rb", C_EXTENSION},
-    {"module.o", "rb", C_EXTENSION},
     {0, 0}
 };
 
diff --git a/Python/dynload_hpux.c b/Python/dynload_hpux.c
--- a/Python/dynload_hpux.c
+++ b/Python/dynload_hpux.c
@@ -15,7 +15,6 @@
 
 const struct filedescr _PyImport_DynLoadFiletab[] = {
     {SHLIB_EXT, "rb", C_EXTENSION},
-    {"module"SHLIB_EXT, "rb", C_EXTENSION},
     {0, 0}
 };
 
diff --git a/Python/dynload_next.c b/Python/dynload_next.c
--- a/Python/dynload_next.c
+++ b/Python/dynload_next.c
@@ -10,7 +10,6 @@
 
 const struct filedescr _PyImport_DynLoadFiletab[] = {
     {".so", "rb", C_EXTENSION},
-    {"module.so", "rb", C_EXTENSION},
     {0, 0}
 };
 
diff --git a/Python/dynload_shlib.c b/Python/dynload_shlib.c
--- a/Python/dynload_shlib.c
+++ b/Python/dynload_shlib.c
@@ -39,7 +39,6 @@
 const struct filedescr _PyImport_DynLoadFiletab[] = {
 #ifdef __CYGWIN__
     {".dll", "rb", C_EXTENSION},
-    {"module.dll", "rb", C_EXTENSION},
 #else  /* !__CYGWIN__ */
 #if defined(PYOS_OS2) && defined(PYCC_GCC)
     {".pyd", "rb", C_EXTENSION},
@@ -48,15 +47,10 @@
 #ifdef __VMS
     {".exe", "rb", C_EXTENSION},
     {".EXE", "rb", C_EXTENSION},
-    {"module.exe", "rb", C_EXTENSION},
-    {"MODULE.EXE", "rb", C_EXTENSION},
 #else  /* !__VMS */
     {"." SOABI ".so", "rb", C_EXTENSION},
-    {"module." SOABI ".so", "rb", C_EXTENSION},
     {".abi" PYTHON_ABI_STRING ".so", "rb", C_EXTENSION},
-    {"module.abi" PYTHON_ABI_STRING ".so", "rb", C_EXTENSION},
     {".so", "rb", C_EXTENSION},
-    {"module.so", "rb", C_EXTENSION},
 #endif  /* __VMS */
 #endif  /* defined(PYOS_OS2) && defined(PYCC_GCC) */
 #endif  /* __CYGWIN__ */

-- 
Repository URL: http://hg.python.org/cpython