[Python-checkins] cpython: Optimize repr(str): use _PyUnicode_FastCopyCharacters() when no character is

victor.stinner python-checkins at python.org
Sun Apr 14 19:31:43 CEST 2013


http://hg.python.org/cpython/rev/870fddd176c8
changeset:   83384:870fddd176c8
user:        Victor Stinner <victor.stinner at gmail.com>
date:        Sun Apr 14 18:45:39 2013 +0200
summary:
  Optimize repr(str): use _PyUnicode_FastCopyCharacters() when no character is escaped

files:
  Objects/unicodeobject.c |  157 ++++++++++++++-------------
  1 files changed, 83 insertions(+), 74 deletions(-)


diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -11968,7 +11968,7 @@
     Py_ssize_t isize;
     Py_ssize_t osize, squote, dquote, i, o;
     Py_UCS4 max, quote;
-    int ikind, okind;
+    int ikind, okind, unchanged;
     void *idata, *odata;
 
     if (PyUnicode_READY(unicode) == -1)
@@ -11979,7 +11979,7 @@
 
     /* Compute length of output, quote characters, and
        maximum character */
-    osize = 2; /* quotes */
+    osize = 0;
     max = 127;
     squote = dquote = 0;
     ikind = PyUnicode_KIND(unicode);
@@ -12010,7 +12010,9 @@
     }
 
     quote = '\'';
+    unchanged = (osize == isize);
     if (squote) {
+        unchanged = 0;
         if (dquote)
             /* Both squote and dquote present. Use squote,
                and escape them */
@@ -12018,6 +12020,7 @@
         else
             quote = '"';
     }
+    osize += 2;   /* quotes */
 
     repr = PyUnicode_New(osize, max);
     if (repr == NULL)
@@ -12027,81 +12030,87 @@
 
     PyUnicode_WRITE(okind, odata, 0, quote);
     PyUnicode_WRITE(okind, odata, osize-1, quote);
-
-    for (i = 0, o = 1; i < isize; i++) {
-        Py_UCS4 ch = PyUnicode_READ(ikind, idata, i);
-
-        /* Escape quotes and backslashes */
-        if ((ch == quote) || (ch == '\\')) {
-            PyUnicode_WRITE(okind, odata, o++, '\\');
-            PyUnicode_WRITE(okind, odata, o++, ch);
-            continue;
-        }
-
-        /* Map special whitespace to '\t', \n', '\r' */
-        if (ch == '\t') {
-            PyUnicode_WRITE(okind, odata, o++, '\\');
-            PyUnicode_WRITE(okind, odata, o++, 't');
-        }
-        else if (ch == '\n') {
-            PyUnicode_WRITE(okind, odata, o++, '\\');
-            PyUnicode_WRITE(okind, odata, o++, 'n');
-        }
-        else if (ch == '\r') {
-            PyUnicode_WRITE(okind, odata, o++, '\\');
-            PyUnicode_WRITE(okind, odata, o++, 'r');
-        }
-
-        /* Map non-printable US ASCII to '\xhh' */
-        else if (ch < ' ' || ch == 0x7F) {
-            PyUnicode_WRITE(okind, odata, o++, '\\');
-            PyUnicode_WRITE(okind, odata, o++, 'x');
-            PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
-            PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
-        }
-
-        /* Copy ASCII characters as-is */
-        else if (ch < 0x7F) {
-            PyUnicode_WRITE(okind, odata, o++, ch);
-        }
-
-        /* Non-ASCII characters */
-        else {
-            /* Map Unicode whitespace and control characters
-               (categories Z* and C* except ASCII space)
-            */
-            if (!Py_UNICODE_ISPRINTABLE(ch)) {
+    if (unchanged) {
+        _PyUnicode_FastCopyCharacters(repr, 1,
+                                      unicode, 0,
+                                      isize);
+    }
+    else {
+        for (i = 0, o = 1; i < isize; i++) {
+            Py_UCS4 ch = PyUnicode_READ(ikind, idata, i);
+
+            /* Escape quotes and backslashes */
+            if ((ch == quote) || (ch == '\\')) {
                 PyUnicode_WRITE(okind, odata, o++, '\\');
-                /* Map 8-bit characters to '\xhh' */
-                if (ch <= 0xff) {
-                    PyUnicode_WRITE(okind, odata, o++, 'x');
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
+                PyUnicode_WRITE(okind, odata, o++, ch);
+                continue;
+            }
+
+            /* Map special whitespace to '\t', \n', '\r' */
+            if (ch == '\t') {
+                PyUnicode_WRITE(okind, odata, o++, '\\');
+                PyUnicode_WRITE(okind, odata, o++, 't');
+            }
+            else if (ch == '\n') {
+                PyUnicode_WRITE(okind, odata, o++, '\\');
+                PyUnicode_WRITE(okind, odata, o++, 'n');
+            }
+            else if (ch == '\r') {
+                PyUnicode_WRITE(okind, odata, o++, '\\');
+                PyUnicode_WRITE(okind, odata, o++, 'r');
+            }
+
+            /* Map non-printable US ASCII to '\xhh' */
+            else if (ch < ' ' || ch == 0x7F) {
+                PyUnicode_WRITE(okind, odata, o++, '\\');
+                PyUnicode_WRITE(okind, odata, o++, 'x');
+                PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
+                PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
+            }
+
+            /* Copy ASCII characters as-is */
+            else if (ch < 0x7F) {
+                PyUnicode_WRITE(okind, odata, o++, ch);
+            }
+
+            /* Non-ASCII characters */
+            else {
+                /* Map Unicode whitespace and control characters
+                   (categories Z* and C* except ASCII space)
+                */
+                if (!Py_UNICODE_ISPRINTABLE(ch)) {
+                    PyUnicode_WRITE(okind, odata, o++, '\\');
+                    /* Map 8-bit characters to '\xhh' */
+                    if (ch <= 0xff) {
+                        PyUnicode_WRITE(okind, odata, o++, 'x');
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0x000F]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0x000F]);
+                    }
+                    /* Map 16-bit characters to '\uxxxx' */
+                    else if (ch <= 0xffff) {
+                        PyUnicode_WRITE(okind, odata, o++, 'u');
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
+                    }
+                    /* Map 21-bit characters to '\U00xxxxxx' */
+                    else {
+                        PyUnicode_WRITE(okind, odata, o++, 'U');
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 28) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 24) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 20) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 16) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
+                        PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
+                    }
                 }
-                /* Map 16-bit characters to '\uxxxx' */
-                else if (ch <= 0xffff) {
-                    PyUnicode_WRITE(okind, odata, o++, 'u');
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
+                /* Copy characters as-is */
+                else {
+                    PyUnicode_WRITE(okind, odata, o++, ch);
                 }
-                /* Map 21-bit characters to '\U00xxxxxx' */
-                else {
-                    PyUnicode_WRITE(okind, odata, o++, 'U');
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 28) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 24) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 20) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 16) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 12) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 8) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[(ch >> 4) & 0xF]);
-                    PyUnicode_WRITE(okind, odata, o++, Py_hexdigits[ch & 0xF]);
-                }
-            }
-            /* Copy characters as-is */
-            else {
-                PyUnicode_WRITE(okind, odata, o++, ch);
             }
         }
     }

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list