[Python-checkins] cpython: Fix "%f" format of str%args if the result is not an ASCII or latin1 string

victor.stinner python-checkins at python.org
Sat Jun 16 03:03:16 CEST 2012


http://hg.python.org/cpython/rev/73ff365bbb1d
changeset:   77462:73ff365bbb1d
user:        Victor Stinner <victor.stinner at gmail.com>
date:        Sat Jun 16 02:57:41 2012 +0200
summary:
  Fix "%f" format of str%args if the result is not an ASCII or latin1 string

files:
  Lib/test/test_format.py |   2 +
  Objects/unicodeobject.c |  36 +++++++++++++++-------------
  2 files changed, 21 insertions(+), 17 deletions(-)


diff --git a/Lib/test/test_format.py b/Lib/test/test_format.py
--- a/Lib/test/test_format.py
+++ b/Lib/test/test_format.py
@@ -265,6 +265,8 @@
                 raise TestFailed('"%*d"%(maxsize, -127) should fail')
 
     def test_non_ascii(self):
+        testformat("\u20ac=%f", (1.0,), "\u20ac=1.000000")
+
         self.assertEqual(format("abc", "\u2007<5"), "abc\u2007\u2007")
         self.assertEqual(format(123, "\u2007<5"), "123\u2007\u2007")
         self.assertEqual(format(12.3, "\u2007<6"), "12.3\u2007\u2007")
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1660,34 +1660,34 @@
 }
 
 /* Copy a ASCII or latin1 char* string into a Python Unicode string.
-   Return the length of the input string.
 
    WARNING: The function doesn't copy the terminating null character and
    doesn't check the maximum character (may write a latin1 character in an
    ASCII string). */
-static Py_ssize_t
-unicode_write_cstr(PyObject *unicode, Py_ssize_t index, const char *str)
+static void
+unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
+                   const char *str, Py_ssize_t len)
 {
     enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
     void *data = PyUnicode_DATA(unicode);
+    const char *end = str + len;
 
     switch (kind) {
     case PyUnicode_1BYTE_KIND: {
-        Py_ssize_t len = strlen(str);
         assert(index + len <= PyUnicode_GET_LENGTH(unicode));
         memcpy((char *) data + index, str, len);
-        return len;
+        break;
     }
     case PyUnicode_2BYTE_KIND: {
         Py_UCS2 *start = (Py_UCS2 *)data + index;
         Py_UCS2 *ucs2 = start;
         assert(index <= PyUnicode_GET_LENGTH(unicode));
 
-        for (; *str; ++ucs2, ++str)
+        for (; str < end; ++ucs2, ++str)
             *ucs2 = (Py_UCS2)*str;
 
         assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode));
-        return ucs2 - start;
+        break;
     }
     default: {
         Py_UCS4 *start = (Py_UCS4 *)data + index;
@@ -1695,11 +1695,10 @@
         assert(kind == PyUnicode_4BYTE_KIND);
         assert(index <= PyUnicode_GET_LENGTH(unicode));
 
-        for (; *str; ++ucs4, ++str)
+        for (; str < end; ++ucs4, ++str)
             *ucs4 = (Py_UCS4)*str;
 
         assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode));
-        return ucs4 - start;
     }
     }
 }
@@ -2730,17 +2729,18 @@
             case 'x':
             case 'p':
             {
-                Py_ssize_t written;
+                Py_ssize_t len;
                 /* unused, since we already have the result */
                 if (*f == 'p')
                     (void) va_arg(vargs, void *);
                 else
                     (void) va_arg(vargs, int);
                 /* extract the result from numberresults and append. */
-                written = unicode_write_cstr(string, i, numberresult);
+                len = strlen(numberresult);
+                unicode_write_cstr(string, i, numberresult, len);
                 /* skip over the separating '\0' */
-                i += written;
-                numberresult += written;
+                i += len;
+                numberresult += len;
                 assert(*numberresult == '\0');
                 numberresult++;
                 assert(numberresult <= numberresults + numbersize);
@@ -2812,10 +2812,14 @@
                 PyUnicode_WRITE(kind, data, i++, '%');
                 break;
             default:
-                i += unicode_write_cstr(string, i, p);
+            {
+                Py_ssize_t len = strlen(p);
+                unicode_write_cstr(string, i, p, len);
+                i += len;
                 assert(i == PyUnicode_GET_LENGTH(string));
                 goto end;
             }
+            }
         }
         else {
             assert(i < PyUnicode_GET_LENGTH(string));
@@ -13211,9 +13215,7 @@
     if (writer) {
         if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
             return -1;
-        memcpy((char*)writer->data + writer->pos * writer->kind,
-               p,
-               len);
+        unicode_write_cstr(writer->buffer, writer->pos, p, len);
         writer->pos += len;
     }
     else

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list