[Python-checkins] r51466 - in python/branches/release24-maint: Lib/test/test_unicode.py Misc/ACKS Misc/NEWS Objects/unicodeobject.c

Tue Aug 22 10:25:35 CEST 2006

Author: georg.brandl
Date: Tue Aug 22 10:25:33 2006
New Revision: 51466

Modified:
   python/branches/release24-maint/Lib/test/test_unicode.py
   python/branches/release24-maint/Misc/ACKS
   python/branches/release24-maint/Misc/NEWS
   python/branches/release24-maint/Objects/unicodeobject.c
Log:
Backport rev 51448:
- Patch #1541585: fix buffer overrun when performing repr() on
  a unicode string in a build with wide unicode (UCS-4) support.



Modified: python/branches/release24-maint/Lib/test/test_unicode.py
==============================================================================

--- python/branches/release24-maint/Lib/test/test_unicode.py	(original)
+++ python/branches/release24-maint/Lib/test/test_unicode.py	Tue Aug 22 10:25:33 2006
@@ -92,6 +92,10 @@
                 "\\xfe\\xff'")
             testrepr = repr(u''.join(map(unichr, xrange(256))))
             self.assertEqual(testrepr, latin1repr)
+            # Test repr works on wide unicode escapes without overflow.
+            self.assertEqual(repr(u"\U00010000" * 39 + u"\uffff" * 4096),
+                             repr(u"\U00010000" * 39 + u"\uffff" * 4096))
+
 
     def test_count(self):
         string_tests.CommonTest.test_count(self)

Modified: python/branches/release24-maint/Misc/ACKS
==============================================================================
--- python/branches/release24-maint/Misc/ACKS	(original)
+++ python/branches/release24-maint/Misc/ACKS	Tue Aug 22 10:25:33 2006
@@ -348,6 +348,7 @@
 Soren Larsen
 Piers Lauder
 Ben Laurie
+Simon Law
 Chris Lawrence
 Christopher Lee
 Inyeol Lee

Modified: python/branches/release24-maint/Misc/NEWS
==============================================================================
--- python/branches/release24-maint/Misc/NEWS	(original)
+++ python/branches/release24-maint/Misc/NEWS	Tue Aug 22 10:25:33 2006
@@ -12,6 +12,9 @@
 Core and builtins
 -----------------
 
+- Patch #1541585: fix buffer overrun when performing repr() on
+  a unicode string in a build with wide unicode (UCS-4) support.
+
 - Bug #1536786: buffer comparison could emit a RuntimeWarning.
 
 - Bug #1535165: fixed a segfault in input() and raw_input() when
@@ -33,6 +36,7 @@
 
 - Patch #1488312, Fix memory alignment problem on SPARC in unicode
 
+
 Extension Modules
 -----------------
 
@@ -72,6 +76,7 @@
   methods now allow their database parameter to be None as the
   sleepycat API allows.
 
+
 Library
 -------
 

Modified: python/branches/release24-maint/Objects/unicodeobject.c
==============================================================================
--- python/branches/release24-maint/Objects/unicodeobject.c	(original)
+++ python/branches/release24-maint/Objects/unicodeobject.c	Tue Aug 22 10:25:33 2006
@@ -1970,7 +1970,28 @@
 
     static const char *hexdigit = "0123456789abcdef";
 
-    repr = PyString_FromStringAndSize(NULL, 2 + 6*size + 1);
+    /* Initial allocation is based on the longest-possible unichr
+       escape.
+
+       In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source
+       unichr, so in this case it's the longest unichr escape. In
+       narrow (UTF-16) builds this is five chars per source unichr
+       since there are two unichrs in the surrogate pair, so in narrow
+       (UTF-16) builds it's not the longest unichr escape.
+
+       In wide or narrow builds '\uxxxx' is 6 chars per source unichr,
+       so in the narrow (UTF-16) build case it's the longest unichr
+       escape.
+    */
+
+    repr = PyString_FromStringAndSize(NULL,
+        2
+#ifdef Py_UNICODE_WIDE
+        + 10*size
+#else
+        + 6*size
+#endif
+        + 1);
     if (repr == NULL)
         return NULL;
 
@@ -1995,15 +2016,6 @@
 #ifdef Py_UNICODE_WIDE
         /* Map 21-bit characters to '\U00xxxxxx' */
         else if (ch >= 0x10000) {
-	    int offset = p - PyString_AS_STRING(repr);
-
-	    /* Resize the string if necessary */
-	    if (offset + 12 > PyString_GET_SIZE(repr)) {
-		if (_PyString_Resize(&repr, PyString_GET_SIZE(repr) + 100))
-		    return NULL;
-		p = PyString_AS_STRING(repr) + offset;
-	    }
-
             *p++ = '\\';
             *p++ = 'U';
             *p++ = hexdigit[(ch >> 28) & 0x0000000F];
@@ -2016,8 +2028,8 @@
             *p++ = hexdigit[ch & 0x0000000F];
 	    continue;
         }
-#endif
-	/* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */
+#else
+	/* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
 	else if (ch >= 0xD800 && ch < 0xDC00) {
 	    Py_UNICODE ch2;
 	    Py_UCS4 ucs;
@@ -2042,6 +2054,7 @@
 	    s--;
 	    size++;
 	}
+#endif
 
         /* Map 16-bit characters to '\uxxxx' */
         if (ch >= 256) {