[Python-checkins] r88553 - in python/branches/py3k: Lib/test/test_bytes.py Lib/test/test_unicode.py Objects/unicodeobject.c

alexander.belopolsky python-checkins at python.org
Fri Feb 25 02:14:14 CET 2011


Author: alexander.belopolsky
Date: Fri Feb 25 02:14:14 2011
New Revision: 88553

Log:
Issue #11311: Short-circuit default encoding case in
PyUnicode_Decode() and PyUnicode_AsEncodedString().  Thanks Ezio
Melotti for the tests and the review.



Modified:
   python/branches/py3k/Lib/test/test_bytes.py
   python/branches/py3k/Lib/test/test_unicode.py
   python/branches/py3k/Objects/unicodeobject.c

Modified: python/branches/py3k/Lib/test/test_bytes.py
==============================================================================
--- python/branches/py3k/Lib/test/test_bytes.py	(original)
+++ python/branches/py3k/Lib/test/test_bytes.py	Fri Feb 25 02:14:14 2011
@@ -206,6 +206,8 @@
         self.assertEqual(b.decode("utf8", "ignore"), "Hello world\n")
         self.assertEqual(b.decode(errors="ignore", encoding="utf8"),
                          "Hello world\n")
+        # Default encoding is utf-8
+        self.assertEqual(self.type2test(b'\xe2\x98\x83').decode(), '\u2603')
 
     def test_from_int(self):
         b = self.type2test(0)

Modified: python/branches/py3k/Lib/test/test_unicode.py
==============================================================================
--- python/branches/py3k/Lib/test/test_unicode.py	(original)
+++ python/branches/py3k/Lib/test/test_unicode.py	Fri Feb 25 02:14:14 2011
@@ -1187,6 +1187,9 @@
         self.assertEqual('hello'.encode('utf-16-be'), b'\000h\000e\000l\000l\000o')
         self.assertEqual('hello'.encode('latin-1'), b'hello')
 
+        # Default encoding is utf-8
+        self.assertEqual('\u2603'.encode(), b'\xe2\x98\x83')
+
         # Roundtrip safety for BMP (just the first 1024 chars)
         for c in range(1024):
             u = chr(c)

Modified: python/branches/py3k/Objects/unicodeobject.c
==============================================================================
--- python/branches/py3k/Objects/unicodeobject.c	(original)
+++ python/branches/py3k/Objects/unicodeobject.c	Fri Feb 25 02:14:14 2011
@@ -1462,7 +1462,7 @@
     char lower[11];  /* Enough for any encoding shortcut */
 
     if (encoding == NULL)
-        encoding = PyUnicode_GetDefaultEncoding();
+        return PyUnicode_DecodeUTF8(s, size, errors);
 
     /* Shortcuts for common default encodings */
     if (normalize_encoding(encoding, lower, sizeof(lower))) {
@@ -1670,7 +1670,9 @@
     }
 
     if (encoding == NULL)
-        encoding = PyUnicode_GetDefaultEncoding();
+        return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
+                                    PyUnicode_GET_SIZE(unicode),
+                                    errors);
 
     /* Shortcuts for common default encodings */
     if (normalize_encoding(encoding, lower, sizeof(lower))) {


More information about the Python-checkins mailing list