[Python-checkins] cpython: Fix naïve heuristic in unicode slicing (followup to 1b4f886dc9e2)

antoine.pitrou python-checkins at python.org
Tue Oct 4 20:08:56 CEST 2011


http://hg.python.org/cpython/rev/981deff56707
changeset:   72673:981deff56707
user:        Antoine Pitrou <solipsis at pitrou.net>
date:        Tue Oct 04 20:00:49 2011 +0200
summary:
  Fix naïve heuristic in unicode slicing (followup to 1b4f886dc9e2)

files:
  Objects/unicodeobject.c |  22 +++++++++++++++-------
  1 files changed, 15 insertions(+), 7 deletions(-)


diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -12258,7 +12258,8 @@
         Py_ssize_t start, stop, step, slicelength, cur, i;
         PyObject *result;
         void *src_data, *dest_data;
-        int kind;
+        int src_kind, dest_kind;
+        Py_UCS4 ch, max_char;
 
         if (PySlice_GetIndicesEx(item, PyUnicode_GET_LENGTH(self),
                                  &start, &stop, &step, &slicelength) < 0) {
@@ -12276,17 +12277,24 @@
             return PyUnicode_Substring((PyObject*)self,
                                        start, start + slicelength);
         }
-        /* General (less optimized) case */
-        result = PyUnicode_New(slicelength, PyUnicode_MAX_CHAR_VALUE(self));
+        /* General case */
+        max_char = 127;
+        src_kind = PyUnicode_KIND(self);
+        src_data = PyUnicode_DATA(self);
+        for (cur = start, i = 0; i < slicelength; cur += step, i++) {
+            ch = PyUnicode_READ(src_kind, src_data, cur);
+            if (ch > max_char)
+                max_char = ch;
+        }
+        result = PyUnicode_New(slicelength, max_char);
         if (result == NULL)
             return NULL;
-        kind = PyUnicode_KIND(self);
-        src_data = PyUnicode_DATA(self);
+        dest_kind = PyUnicode_KIND(result);
         dest_data = PyUnicode_DATA(result);
 
         for (cur = start, i = 0; i < slicelength; cur += step, i++) {
-            Py_UCS4 ch = PyUnicode_READ(kind, src_data, cur);
-            PyUnicode_WRITE(kind, dest_data, i, ch);
+            Py_UCS4 ch = PyUnicode_READ(src_kind, src_data, cur);
+            PyUnicode_WRITE(dest_kind, dest_data, i, ch);
         }
         return result;
     } else {

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list