[Python-checkins] r88698 - in python/branches/release32-maint: Lib/test/test_unicode.py Misc/NEWS Objects/unicodeobject.c

victor.stinner python-checkins at python.org
Tue Mar 1 23:48:50 CET 2011


Author: victor.stinner
Date: Tue Mar  1 23:48:49 2011
New Revision: 88698

Log:
Merged revisions 88697 via svnmerge from 
svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r88697 | victor.stinner | 2011-03-01 23:46:52 +0100 (mar., 01 mars 2011) | 4 lines
  
  Issue #11246: Fix PyUnicode_FromFormat("%V")
  
  Decode the byte string from UTF-8 (with replace error handler) instead of
  ISO-8859-1 (in strict mode). Patch written by Ray Allen.
........


Modified:
   python/branches/release32-maint/   (props changed)
   python/branches/release32-maint/Lib/test/test_unicode.py
   python/branches/release32-maint/Misc/NEWS
   python/branches/release32-maint/Objects/unicodeobject.c

Modified: python/branches/release32-maint/Lib/test/test_unicode.py
==============================================================================
--- python/branches/release32-maint/Lib/test/test_unicode.py	(original)
+++ python/branches/release32-maint/Lib/test/test_unicode.py	Tue Mar  1 23:48:49 2011
@@ -1459,6 +1459,19 @@
         text = PyUnicode_FromFormat(b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')
         self.assertEqual(text, r"%A:'abc\xe9\uabcd\U0010ffff'")
 
+        text = PyUnicode_FromFormat(b'repr=%V', 'abc', b'xyz')
+        self.assertEqual(text, 'repr=abc')
+
+        # Test string decode from parameter of %s using utf-8.
+        # b'\xe4\xba\xba\xe6\xb0\x91' is utf-8 encoded byte sequence of
+        # '\u4eba\u6c11'
+        text = PyUnicode_FromFormat(b'repr=%V', None, b'\xe4\xba\xba\xe6\xb0\x91')
+        self.assertEqual(text, 'repr=\u4eba\u6c11')
+
+        #Test replace error handler.
+        text = PyUnicode_FromFormat(b'repr=%V', None, b'abc\xff')
+        self.assertEqual(text, 'repr=abc\ufffd')
+
     # Test PyUnicode_AsWideChar()
     def test_aswidechar(self):
         from _testcapi import unicode_aswidechar

Modified: python/branches/release32-maint/Misc/NEWS
==============================================================================
--- python/branches/release32-maint/Misc/NEWS	(original)
+++ python/branches/release32-maint/Misc/NEWS	Tue Mar  1 23:48:49 2011
@@ -10,6 +10,10 @@
 Core and Builtins
 -----------------
 
+- Issue #11246: Fix PyUnicode_FromFormat("%V") to decode the byte string from
+  UTF-8 (with replace error handler) instead of ISO-8859-1 (in strict mode).
+  Patch written by Ray Allen.
+
 - Issue #11286: Raise a ValueError from calling PyMemoryView_FromBuffer with
   a buffer struct having a NULL data pointer.
 

Modified: python/branches/release32-maint/Objects/unicodeobject.c
==============================================================================
--- python/branches/release32-maint/Objects/unicodeobject.c	(original)
+++ python/branches/release32-maint/Objects/unicodeobject.c	Tue Mar  1 23:48:49 2011
@@ -752,7 +752,7 @@
          if (*f == '%') {
              if (*(f+1)=='%')
                  continue;
-             if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A')
+             if (*(f+1)=='S' || *(f+1)=='R' || *(f+1)=='A' || *(f+1) == 'V')
                  ++callcount;
              while (Py_ISDIGIT((unsigned)*f))
                  width = (width*10) + *f++ - '0';
@@ -872,12 +872,20 @@
             {
                 PyObject *obj = va_arg(count, PyObject *);
                 const char *str = va_arg(count, const char *);
+                PyObject *str_obj;
                 assert(obj || str);
                 assert(!obj || PyUnicode_Check(obj));
-                if (obj)
+                if (obj) {
                     n += PyUnicode_GET_SIZE(obj);
-                else
-                    n += strlen(str);
+                    *callresult++ = NULL;
+                }
+                else {
+                    str_obj = PyUnicode_DecodeUTF8(str, strlen(str), "replace");
+                    if (!str_obj)
+                        goto fail;
+                    n += PyUnicode_GET_SIZE(str_obj);
+                    *callresult++ = str_obj;
+                }
                 break;
             }
             case 'S':
@@ -1080,14 +1088,18 @@
             case 'V':
             {
                 PyObject *obj = va_arg(vargs, PyObject *);
-                const char *str = va_arg(vargs, const char *);
+                va_arg(vargs, const char *);
                 if (obj) {
                     Py_ssize_t size = PyUnicode_GET_SIZE(obj);
                     Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(obj), size);
                     s += size;
                 } else {
-                    appendstring(str);
+                    Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(*callresult),
+                                    PyUnicode_GET_SIZE(*callresult));
+                    s += PyUnicode_GET_SIZE(*callresult);
+                    Py_DECREF(*callresult);
                 }
+                ++callresult;
                 break;
             }
             case 'S':
@@ -1144,7 +1156,7 @@
     if (callresults) {
         PyObject **callresult2 = callresults;
         while (callresult2 < callresult) {
-            Py_DECREF(*callresult2);
+            Py_XDECREF(*callresult2);
             ++callresult2;
         }
         PyObject_Free(callresults);


More information about the Python-checkins mailing list