[Python-checkins] cpython: Optimize _PyUnicode_AsKind() for UCS1->UCS4 and UCS2->UCS4

victor.stinner python-checkins at python.org
Sun Oct 2 01:14:19 CEST 2011


http://hg.python.org/cpython/rev/329a981b9143
changeset:   72578:329a981b9143
user:        Victor Stinner <victor.stinner at haypocalc.com>
date:        Sun Oct 02 01:00:40 2011 +0200
summary:
  Optimize _PyUnicode_AsKind() for UCS1->UCS4 and UCS2->UCS4

 * Ensure that the input string is ready
 * Raise a ValueError instead of of a fatal error

files:
  Objects/unicodeobject.c |  72 ++++++++++++++++++----------
  1 files changed, 45 insertions(+), 27 deletions(-)


diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1264,43 +1264,61 @@
 }
 
 
-/* Widen Unicode objects to larger buffers.
-   Return NULL if the string is too wide already. */
+/* Widen Unicode objects to larger buffers. Don't write terminating null
+   character. Return NULL on error. */
 
 void*
 _PyUnicode_AsKind(PyObject *s, unsigned int kind)
 {
-    Py_ssize_t i;
-    Py_ssize_t len = PyUnicode_GET_LENGTH(s);
-    void *d = PyUnicode_DATA(s);
-    unsigned int skind = PyUnicode_KIND(s);
-    if (PyUnicode_KIND(s) >= kind) {
+    Py_ssize_t len;
+    void *result;
+    unsigned int skind;
+
+    if (PyUnicode_READY(s))
+        return NULL;
+
+    len = PyUnicode_GET_LENGTH(s);
+    skind = PyUnicode_KIND(s);
+    if (skind >= kind) {
         PyErr_SetString(PyExc_RuntimeError, "invalid widening attempt");
         return NULL;
     }
     switch(kind) {
-    case PyUnicode_2BYTE_KIND: {
-        Py_UCS2 *result = PyMem_Malloc(PyUnicode_GET_LENGTH(s) * sizeof(Py_UCS2));
-        if (!result) {
-            PyErr_NoMemory();
-            return 0;
-        }
-        for (i = 0; i < len; i++)
-            result[i] = ((Py_UCS1*)d)[i];
+    case PyUnicode_2BYTE_KIND:
+        result = PyMem_Malloc(len * sizeof(Py_UCS2));
+        if (!result)
+            return PyErr_NoMemory();
+        assert(skind == PyUnicode_1BYTE_KIND);
+        _PyUnicode_CONVERT_BYTES(
+            Py_UCS1, Py_UCS2,
+            PyUnicode_1BYTE_DATA(s),
+            PyUnicode_1BYTE_DATA(s) + len,
+            result);
         return result;
-    }
-    case PyUnicode_4BYTE_KIND: {
-        Py_UCS4 *result = PyMem_Malloc(PyUnicode_GET_LENGTH(s) * sizeof(Py_UCS4));
-        if (!result) {
-            PyErr_NoMemory();
-            return 0;
-        }
-        for (i = 0; i < len; i++)
-            result[i] = PyUnicode_READ(skind, d, i);
+    case PyUnicode_4BYTE_KIND:
+        result = PyMem_Malloc(len * sizeof(Py_UCS4));
+        if (!result)
+            return PyErr_NoMemory();
+        if (skind == PyUnicode_2BYTE_KIND) {
+            _PyUnicode_CONVERT_BYTES(
+                Py_UCS2, Py_UCS4,
+                PyUnicode_2BYTE_DATA(s),
+                PyUnicode_2BYTE_DATA(s) + len,
+                result);
+        }
+        else {
+            assert(skind == PyUnicode_1BYTE_KIND);
+            _PyUnicode_CONVERT_BYTES(
+                Py_UCS1, Py_UCS4,
+                PyUnicode_1BYTE_DATA(s),
+                PyUnicode_1BYTE_DATA(s) + len,
+                result);
+        }
         return result;
-    }
-    }
-    Py_FatalError("invalid kind");
+    default:
+        break;
+    }
+    PyErr_SetString(PyExc_ValueError, "invalid kind");
     return NULL;
 }
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list