[Python-checkins] cpython: Fix PyUnicode_Substring() for start >= length and start > end

Thu May 3 02:33:57 CEST 2012

http://hg.python.org/cpython/rev/99be985edeca
changeset:   76723:99be985edeca
user:        Victor Stinner <victor.stinner at gmail.com>
date:        Thu May 03 02:32:34 2012 +0200
summary:
  Fix PyUnicode_Substring() for start >= length and start > end

Remove the fast-path for 1-character string: unicode_fromascii() and
_PyUnicode_FromUCS*() now have their own fast-path for 1-character strings.

files:
  Objects/unicodeobject.c |  18 ++++++++++--------
  1 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -12067,20 +12067,22 @@
     if (PyUnicode_READY(self) == -1)
         return NULL;
 
-    end = Py_MIN(end, PyUnicode_GET_LENGTH(self));
-
-    if (start == 0 && end == PyUnicode_GET_LENGTH(self))
+    length = PyUnicode_GET_LENGTH(self);
+    end = Py_MIN(end, length);
+
+    if (start == 0 && end == length)
         return unicode_result_unchanged(self);
 
-    length = end - start;
-    if (length == 1)
-        return unicode_getitem(self, start);
-
     if (start < 0 || end < 0) {
         PyErr_SetString(PyExc_IndexError, "string index out of range");
         return NULL;
     }
-
+    if (start >= length || end < start) {
+        assert(end == length);
+        return PyUnicode_New(0, 0);
+    }
+
+    length = end - start;
     if (PyUnicode_IS_ASCII(self)) {
         data = PyUnicode_1BYTE_DATA(self);
         return unicode_fromascii(data + start, length);

-- 
Repository URL: http://hg.python.org/cpython