[Python-checkins] r75930 - in python/branches/release31-maint: Lib/test/test_pep263.py Misc/NEWS Python/ast.c

benjamin.peterson python-checkins at python.org
Thu Oct 29 02:22:38 CET 2009


Author: benjamin.peterson
Date: Thu Oct 29 02:22:38 2009
New Revision: 75930

Log:
Merged revisions 75928 via svnmerge from 
svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r75928 | benjamin.peterson | 2009-10-28 16:59:39 -0500 (Wed, 28 Oct 2009) | 5 lines
  
  in wide builds, avoid storing high unicode characters from source code with surrogates
  
  This is accomplished by decoding with utf-32 instead of utf-16 on all builds.
  The patch is by Adam Olsen.
........


Modified:
   python/branches/release31-maint/   (props changed)
   python/branches/release31-maint/Lib/test/test_pep263.py
   python/branches/release31-maint/Misc/NEWS
   python/branches/release31-maint/Python/ast.c

Modified: python/branches/release31-maint/Lib/test/test_pep263.py
==============================================================================
--- python/branches/release31-maint/Lib/test/test_pep263.py	(original)
+++ python/branches/release31-maint/Lib/test/test_pep263.py	Thu Oct 29 02:22:38 2009
@@ -36,6 +36,14 @@
         exec(c, d)
         self.assertEquals(d['\xc6'], '\xc6')
 
+    def test_issue3297(self):
+        c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec")
+        d = {}
+        exec(c, d)
+        self.assertEqual(d['a'], d['b'])
+        self.assertEqual(len(d['a']), len(d['b']))
+        self.assertEqual(ascii(d['a']), ascii(d['b']))
+
 def test_main():
     support.run_unittest(PEP263Test)
 

Modified: python/branches/release31-maint/Misc/NEWS
==============================================================================
--- python/branches/release31-maint/Misc/NEWS	(original)
+++ python/branches/release31-maint/Misc/NEWS	Thu Oct 29 02:22:38 2009
@@ -12,6 +12,9 @@
 Core and Builtins
 -----------------
 
+- Issue #3297: On wide unicode builds, do not split unicode characters into
+  surrogates.
+
 - Issue #1722344: threading._shutdown() is now called in Py_Finalize(), which
   fixes the problem of some exceptions being thrown at shutdown when the
   interpreter is killed. Patch by Adam Olsen.

Modified: python/branches/release31-maint/Python/ast.c
==============================================================================
--- python/branches/release31-maint/Python/ast.c	(original)
+++ python/branches/release31-maint/Python/ast.c	Thu Oct 29 02:22:38 2009
@@ -3217,10 +3217,11 @@
         u = NULL;
     } else {
         /* check for integer overflow */
-        if (len > PY_SIZE_MAX / 4)
+        if (len > PY_SIZE_MAX / 6)
             return NULL;
-        /* "\XX" may become "\u005c\uHHLL" (12 bytes) */
-        u = PyBytes_FromStringAndSize((char *)NULL, len * 4);
+        /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
+           "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
+        u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
         if (u == NULL)
             return NULL;
         p = buf = PyBytes_AsString(u);
@@ -3237,20 +3238,24 @@
                 PyObject *w;
                 char *r;
                 Py_ssize_t rn, i;
-                w = decode_utf8(c, &s, end, "utf-16-be");
+                w = decode_utf8(c, &s, end, "utf-32-be");
                 if (w == NULL) {
                     Py_DECREF(u);
                     return NULL;
                 }
                 r = PyBytes_AS_STRING(w);
                 rn = Py_SIZE(w);
-                assert(rn % 2 == 0);
-                for (i = 0; i < rn; i += 2) {
-                    sprintf(p, "\\u%02x%02x",
+                assert(rn % 4 == 0);
+                for (i = 0; i < rn; i += 4) {
+                    sprintf(p, "\\U%02x%02x%02x%02x",
                             r[i + 0] & 0xFF,
-                            r[i + 1] & 0xFF);
-                    p += 6;
+                            r[i + 1] & 0xFF,
+                            r[i + 2] & 0xFF,
+                            r[i + 3] & 0xFF);
+                    p += 10;
                 }
+                /* Should be impossible to overflow */
+                assert(p - buf <= Py_SIZE(u));
                 Py_DECREF(w);
             } else {
                 *p++ = *s++;


More information about the Python-checkins mailing list