[Python-checkins] cpython (merge default -> default): Merged upstream changes.
vinay.sajip
python-checkins at python.org
Mon Feb 20 19:51:12 CET 2012
http://hg.python.org/cpython/rev/ae960e5ae112
changeset: 75064:ae960e5ae112
parent: 75063:2b4a553bd6ed
parent: 75059:42f61304f77d
user: Vinay Sajip <vinay_sajip at yahoo.co.uk>
date: Mon Feb 20 18:50:33 2012 +0000
summary:
Merged upstream changes.
files:
Doc/library/base64.rst | 11 +-
Doc/whatsnew/3.3.rst | 14 ++
Lib/base64.py | 26 ++-
Lib/test/test_base64.py | 163 +++++++++++++++++----------
Misc/NEWS | 6 +
Python/dynload_aix.c | 1 -
Python/dynload_dl.c | 1 -
Python/dynload_hpux.c | 1 -
Python/dynload_next.c | 1 -
Python/dynload_shlib.c | 6 -
10 files changed, 147 insertions(+), 83 deletions(-)
diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@@ -18,9 +18,14 @@
There are two interfaces provided by this module. The modern interface
supports encoding and decoding ASCII byte string objects using all three
-alphabets. The legacy interface provides for encoding and decoding to and from
-file-like objects as well as byte strings, but only using the Base64 standard
-alphabet.
+alphabets. Additionally, the decoding functions of the modern interface also
+accept Unicode strings containing only ASCII characters. The legacy interface
+provides for encoding and decoding to and from file-like objects as well as
+byte strings, but only using the Base64 standard alphabet.
+
+.. versionchanged:: 3.3
+ ASCII-only Unicode strings are now accepted by the decoding functions of
+ the modern interface.
The modern interface provides:
diff --git a/Doc/whatsnew/3.3.rst b/Doc/whatsnew/3.3.rst
--- a/Doc/whatsnew/3.3.rst
+++ b/Doc/whatsnew/3.3.rst
@@ -939,6 +939,20 @@
:c:func:`PyUnicode_FromFormat()`, your code will automatically take
advantage of the new unicode representations.
+Building C extensions
+---------------------
+
+* The range of possible file names for C extensions has been narrowed.
+ Very rarely used spellings have been suppressed: under POSIX, files
+ named ``xxxmodule.so``, ``xxxmodule.abi3.so`` and
+ ``xxxmodule.cpython-*.so`` are no longer recognized as implementing
+ the ``xxx`` module. If you had been generating such files, you have
+ to switch to the other spellings (i.e., remove the ``module`` string
+ from the file names).
+
+ (implemented in :issue:`14040`.)
+
+
Other issues
------------
diff --git a/Lib/base64.py b/Lib/base64.py
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -29,6 +29,16 @@
bytes_types = (bytes, bytearray) # Types acceptable as binary data
+def _bytes_from_decode_data(s):
+ if isinstance(s, str):
+ try:
+ return s.encode('ascii')
+ except UnicodeEncodeError:
+ raise ValueError('string argument should contain only ASCII characters')
+ elif isinstance(s, bytes_types):
+ return s
+ else:
+ raise TypeError("argument should be bytes or ASCII string, not %s" % s.__class__.__name__)
def _translate(s, altchars):
if not isinstance(s, bytes_types):
@@ -79,12 +89,9 @@
discarded prior to the padding check. If validate is True,
non-base64-alphabet characters in the input result in a binascii.Error.
"""
- if not isinstance(s, bytes_types):
- raise TypeError("expected bytes, not %s" % s.__class__.__name__)
+ s = _bytes_from_decode_data(s)
if altchars is not None:
- if not isinstance(altchars, bytes_types):
- raise TypeError("expected bytes, not %s"
- % altchars.__class__.__name__)
+ altchars = _bytes_from_decode_data(altchars)
assert len(altchars) == 2, repr(altchars)
s = _translate(s, {chr(altchars[0]): b'+', chr(altchars[1]): b'/'})
if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s):
@@ -211,8 +218,7 @@
the input is incorrectly padded or if there are non-alphabet
characters present in the input.
"""
- if not isinstance(s, bytes_types):
- raise TypeError("expected bytes, not %s" % s.__class__.__name__)
+ s = _bytes_from_decode_data(s)
quanta, leftover = divmod(len(s), 8)
if leftover:
raise binascii.Error('Incorrect padding')
@@ -220,8 +226,7 @@
# False, or the character to map the digit 1 (one) to. It should be
# either L (el) or I (eye).
if map01 is not None:
- if not isinstance(map01, bytes_types):
- raise TypeError("expected bytes, not %s" % map01.__class__.__name__)
+ map01 = _bytes_from_decode_data(map01)
assert len(map01) == 1, repr(map01)
s = _translate(s, {b'0': b'O', b'1': map01})
if casefold:
@@ -292,8 +297,7 @@
s were incorrectly padded or if there are non-alphabet characters
present in the string.
"""
- if not isinstance(s, bytes_types):
- raise TypeError("expected bytes, not %s" % s.__class__.__name__)
+ s = _bytes_from_decode_data(s)
if casefold:
s = s.upper()
if re.search(b'[^0-9A-F]', s):
diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -102,44 +102,53 @@
def test_b64decode(self):
eq = self.assertEqual
- eq(base64.b64decode(b"d3d3LnB5dGhvbi5vcmc="), b"www.python.org")
- eq(base64.b64decode(b'AA=='), b'\x00')
- eq(base64.b64decode(b"YQ=="), b"a")
- eq(base64.b64decode(b"YWI="), b"ab")
- eq(base64.b64decode(b"YWJj"), b"abc")
- eq(base64.b64decode(b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
- b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT"
- b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="),
- b"abcdefghijklmnopqrstuvwxyz"
- b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- b"0123456789!@#0^&*();:<>,. []{}")
- eq(base64.b64decode(b''), b'')
+
+ tests = {b"d3d3LnB5dGhvbi5vcmc=": b"www.python.org",
+ b'AA==': b'\x00',
+ b"YQ==": b"a",
+ b"YWI=": b"ab",
+ b"YWJj": b"abc",
+ b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
+ b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0\nNT"
+ b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ==":
+
+ b"abcdefghijklmnopqrstuvwxyz"
+ b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ b"0123456789!@#0^&*();:<>,. []{}",
+ b'': b'',
+ }
+ for data, res in tests.items():
+ eq(base64.b64decode(data), res)
+ eq(base64.b64decode(data.decode('ascii')), res)
+
# Test with arbitrary alternative characters
- eq(base64.b64decode(b'01a*b$cd', altchars=b'*$'), b'\xd3V\xbeo\xf7\x1d')
- # Check if passing a str object raises an error
- self.assertRaises(TypeError, base64.b64decode, "")
- self.assertRaises(TypeError, base64.b64decode, b"", altchars="")
+ tests_altchars = {(b'01a*b$cd', b'*$'): b'\xd3V\xbeo\xf7\x1d',
+ }
+ for (data, altchars), res in tests_altchars.items():
+ data_str = data.decode('ascii')
+ altchars_str = altchars.decode('ascii')
+
+ eq(base64.b64decode(data, altchars=altchars), res)
+ eq(base64.b64decode(data_str, altchars=altchars), res)
+ eq(base64.b64decode(data, altchars=altchars_str), res)
+ eq(base64.b64decode(data_str, altchars=altchars_str), res)
+
# Test standard alphabet
- eq(base64.standard_b64decode(b"d3d3LnB5dGhvbi5vcmc="), b"www.python.org")
- eq(base64.standard_b64decode(b"YQ=="), b"a")
- eq(base64.standard_b64decode(b"YWI="), b"ab")
- eq(base64.standard_b64decode(b"YWJj"), b"abc")
- eq(base64.standard_b64decode(b""), b"")
- eq(base64.standard_b64decode(b"YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXpBQkNE"
- b"RUZHSElKS0xNTk9QUVJTVFVWV1hZWjAxMjM0NT"
- b"Y3ODkhQCMwXiYqKCk7Ojw+LC4gW117fQ=="),
- b"abcdefghijklmnopqrstuvwxyz"
- b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- b"0123456789!@#0^&*();:<>,. []{}")
- # Check if passing a str object raises an error
- self.assertRaises(TypeError, base64.standard_b64decode, "")
- self.assertRaises(TypeError, base64.standard_b64decode, b"", altchars="")
+ for data, res in tests.items():
+ eq(base64.standard_b64decode(data), res)
+ eq(base64.standard_b64decode(data.decode('ascii')), res)
+
# Test with 'URL safe' alternative characters
- eq(base64.urlsafe_b64decode(b'01a-b_cd'), b'\xd3V\xbeo\xf7\x1d')
- self.assertRaises(TypeError, base64.urlsafe_b64decode, "")
+ tests_urlsafe = {b'01a-b_cd': b'\xd3V\xbeo\xf7\x1d',
+ b'': b'',
+ }
+ for data, res in tests_urlsafe.items():
+ eq(base64.urlsafe_b64decode(data), res)
+ eq(base64.urlsafe_b64decode(data.decode('ascii')), res)
def test_b64decode_padding_error(self):
self.assertRaises(binascii.Error, base64.b64decode, b'abc')
+ self.assertRaises(binascii.Error, base64.b64decode, 'abc')
def test_b64decode_invalid_chars(self):
# issue 1466065: Test some invalid characters.
@@ -154,8 +163,10 @@
(b'YWJj\nYWI=', b'abcab'))
for bstr, res in tests:
self.assertEqual(base64.b64decode(bstr), res)
+ self.assertEqual(base64.b64decode(bstr.decode('ascii')), res)
with self.assertRaises(binascii.Error):
base64.b64decode(bstr, validate=True)
+ base64.b64decode(bstr.decode('ascii'), validate=True)
def test_b32encode(self):
eq = self.assertEqual
@@ -170,40 +181,62 @@
def test_b32decode(self):
eq = self.assertEqual
- eq(base64.b32decode(b''), b'')
- eq(base64.b32decode(b'AA======'), b'\x00')
- eq(base64.b32decode(b'ME======'), b'a')
- eq(base64.b32decode(b'MFRA===='), b'ab')
- eq(base64.b32decode(b'MFRGG==='), b'abc')
- eq(base64.b32decode(b'MFRGGZA='), b'abcd')
- eq(base64.b32decode(b'MFRGGZDF'), b'abcde')
- self.assertRaises(TypeError, base64.b32decode, "")
+ tests = {b'': b'',
+ b'AA======': b'\x00',
+ b'ME======': b'a',
+ b'MFRA====': b'ab',
+ b'MFRGG===': b'abc',
+ b'MFRGGZA=': b'abcd',
+ b'MFRGGZDF': b'abcde',
+ }
+ for data, res in tests.items():
+ eq(base64.b32decode(data), res)
+ eq(base64.b32decode(data.decode('ascii')), res)
def test_b32decode_casefold(self):
eq = self.assertEqual
- eq(base64.b32decode(b'', True), b'')
- eq(base64.b32decode(b'ME======', True), b'a')
- eq(base64.b32decode(b'MFRA====', True), b'ab')
- eq(base64.b32decode(b'MFRGG===', True), b'abc')
- eq(base64.b32decode(b'MFRGGZA=', True), b'abcd')
- eq(base64.b32decode(b'MFRGGZDF', True), b'abcde')
- # Lower cases
- eq(base64.b32decode(b'me======', True), b'a')
- eq(base64.b32decode(b'mfra====', True), b'ab')
- eq(base64.b32decode(b'mfrgg===', True), b'abc')
- eq(base64.b32decode(b'mfrggza=', True), b'abcd')
- eq(base64.b32decode(b'mfrggzdf', True), b'abcde')
- # Expected exceptions
+ tests = {b'': b'',
+ b'ME======': b'a',
+ b'MFRA====': b'ab',
+ b'MFRGG===': b'abc',
+ b'MFRGGZA=': b'abcd',
+ b'MFRGGZDF': b'abcde',
+ # Lower cases
+ b'me======': b'a',
+ b'mfra====': b'ab',
+ b'mfrgg===': b'abc',
+ b'mfrggza=': b'abcd',
+ b'mfrggzdf': b'abcde',
+ }
+
+ for data, res in tests.items():
+ eq(base64.b32decode(data, True), res)
+ eq(base64.b32decode(data.decode('ascii'), True), res)
+
self.assertRaises(TypeError, base64.b32decode, b'me======')
+ self.assertRaises(TypeError, base64.b32decode, 'me======')
+
# Mapping zero and one
eq(base64.b32decode(b'MLO23456'), b'b\xdd\xad\xf3\xbe')
- eq(base64.b32decode(b'M1023456', map01=b'L'), b'b\xdd\xad\xf3\xbe')
- eq(base64.b32decode(b'M1023456', map01=b'I'), b'b\x1d\xad\xf3\xbe')
- self.assertRaises(TypeError, base64.b32decode, b"", map01="")
+ eq(base64.b32decode('MLO23456'), b'b\xdd\xad\xf3\xbe')
+
+ map_tests = {(b'M1023456', b'L'): b'b\xdd\xad\xf3\xbe',
+ (b'M1023456', b'I'): b'b\x1d\xad\xf3\xbe',
+ }
+ for (data, map01), res in map_tests.items():
+ data_str = data.decode('ascii')
+ map01_str = map01.decode('ascii')
+
+ eq(base64.b32decode(data, map01=map01), res)
+ eq(base64.b32decode(data_str, map01=map01), res)
+ eq(base64.b32decode(data, map01=map01_str), res)
+ eq(base64.b32decode(data_str, map01=map01_str), res)
def test_b32decode_error(self):
- self.assertRaises(binascii.Error, base64.b32decode, b'abc')
- self.assertRaises(binascii.Error, base64.b32decode, b'ABCDEF==')
+ for data in [b'abc', b'ABCDEF==']:
+ with self.assertRaises(binascii.Error):
+ base64.b32decode(data)
+ base64.b32decode(data.decode('ascii'))
def test_b16encode(self):
eq = self.assertEqual
@@ -214,12 +247,24 @@
def test_b16decode(self):
eq = self.assertEqual
eq(base64.b16decode(b'0102ABCDEF'), b'\x01\x02\xab\xcd\xef')
+ eq(base64.b16decode('0102ABCDEF'), b'\x01\x02\xab\xcd\xef')
eq(base64.b16decode(b'00'), b'\x00')
+ eq(base64.b16decode('00'), b'\x00')
# Lower case is not allowed without a flag
self.assertRaises(binascii.Error, base64.b16decode, b'0102abcdef')
+ self.assertRaises(binascii.Error, base64.b16decode, '0102abcdef')
# Case fold
eq(base64.b16decode(b'0102abcdef', True), b'\x01\x02\xab\xcd\xef')
- self.assertRaises(TypeError, base64.b16decode, "")
+ eq(base64.b16decode('0102abcdef', True), b'\x01\x02\xab\xcd\xef')
+
+ def test_decode_nonascii_str(self):
+ decode_funcs = (base64.b64decode,
+ base64.standard_b64decode,
+ base64.urlsafe_b64decode,
+ base64.b32decode,
+ base64.b16decode)
+ for f in decode_funcs:
+ self.assertRaises(ValueError, f, 'with non-ascii \xcb')
def test_ErrorHeritage(self):
self.assertTrue(issubclass(binascii.Error, ValueError))
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
Core and Builtins
-----------------
+- Issue #14040: Remove rarely used file name suffixes for C extensions
+ (under POSIX mainly).
+
- Issue #14051: Allow arbitrary attributes to be set of classmethod and
staticmethod.
@@ -469,6 +472,9 @@
Library
-------
+- Issue #13641: Decoding functions in the base64 module now accept ASCII-only
+ unicode strings. Patch by Catalin Iacob.
+
- Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a
new importlib.invalidate_caches() function.
diff --git a/Python/dynload_aix.c b/Python/dynload_aix.c
--- a/Python/dynload_aix.c
+++ b/Python/dynload_aix.c
@@ -28,7 +28,6 @@
const struct filedescr _PyImport_DynLoadFiletab[] = {
{".so", "rb", C_EXTENSION},
- {"module.so", "rb", C_EXTENSION},
{0, 0}
};
diff --git a/Python/dynload_dl.c b/Python/dynload_dl.c
--- a/Python/dynload_dl.c
+++ b/Python/dynload_dl.c
@@ -11,7 +11,6 @@
const struct filedescr _PyImport_DynLoadFiletab[] = {
{".o", "rb", C_EXTENSION},
- {"module.o", "rb", C_EXTENSION},
{0, 0}
};
diff --git a/Python/dynload_hpux.c b/Python/dynload_hpux.c
--- a/Python/dynload_hpux.c
+++ b/Python/dynload_hpux.c
@@ -15,7 +15,6 @@
const struct filedescr _PyImport_DynLoadFiletab[] = {
{SHLIB_EXT, "rb", C_EXTENSION},
- {"module"SHLIB_EXT, "rb", C_EXTENSION},
{0, 0}
};
diff --git a/Python/dynload_next.c b/Python/dynload_next.c
--- a/Python/dynload_next.c
+++ b/Python/dynload_next.c
@@ -10,7 +10,6 @@
const struct filedescr _PyImport_DynLoadFiletab[] = {
{".so", "rb", C_EXTENSION},
- {"module.so", "rb", C_EXTENSION},
{0, 0}
};
diff --git a/Python/dynload_shlib.c b/Python/dynload_shlib.c
--- a/Python/dynload_shlib.c
+++ b/Python/dynload_shlib.c
@@ -39,7 +39,6 @@
const struct filedescr _PyImport_DynLoadFiletab[] = {
#ifdef __CYGWIN__
{".dll", "rb", C_EXTENSION},
- {"module.dll", "rb", C_EXTENSION},
#else /* !__CYGWIN__ */
#if defined(PYOS_OS2) && defined(PYCC_GCC)
{".pyd", "rb", C_EXTENSION},
@@ -48,15 +47,10 @@
#ifdef __VMS
{".exe", "rb", C_EXTENSION},
{".EXE", "rb", C_EXTENSION},
- {"module.exe", "rb", C_EXTENSION},
- {"MODULE.EXE", "rb", C_EXTENSION},
#else /* !__VMS */
{"." SOABI ".so", "rb", C_EXTENSION},
- {"module." SOABI ".so", "rb", C_EXTENSION},
{".abi" PYTHON_ABI_STRING ".so", "rb", C_EXTENSION},
- {"module.abi" PYTHON_ABI_STRING ".so", "rb", C_EXTENSION},
{".so", "rb", C_EXTENSION},
- {"module.so", "rb", C_EXTENSION},
#endif /* __VMS */
#endif /* defined(PYOS_OS2) && defined(PYCC_GCC) */
#endif /* __CYGWIN__ */
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list