[Cython] About IndexNode and unicode[index]

Stefan Behnel stefan_ml at behnel.de
Fri Mar 1 08:56:21 CET 2013


Zaur Shibzukhov, 01.03.2013 08:37:
> unicode_slice.h
> ---------------------
> 
> #include "unicodeobject.h"
> 
> static inline PyObject* unicode_slice(
>             PyObject* text, Py_ssize_t start, Py_ssize_t stop);
> 
> /////////////// PyUnicode_Substring ///////////////
> 
> /* CURRENT */
> 
> static inline PyObject* unicode_slice(
>             PyObject* text, Py_ssize_t start, Py_ssize_t stop) {
>     Py_ssize_t length;
> #if CYTHON_PEP393_ENABLED
>     if (PyUnicode_READY(text) == -1) return NULL;
>     length = PyUnicode_GET_LENGTH(text);
> #else
>     length = PyUnicode_GET_SIZE(text);
> #endif
>     if (start < 0) {
>         start += length;
>         if (start < 0)
>             start = 0;
>     }
>     if (stop < 0)
>         stop += length;
>     else if (stop > length)
>         stop = length;
>     length = stop - start;
>     if (length <= 0)
>         return PyUnicode_FromUnicode(NULL, 0);
> #if CYTHON_PEP393_ENABLED
>     return PyUnicode_FromKindAndData(PyUnicode_KIND(text),
>         PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start);
> #else
>     return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start);
> #endif
> }
> 
> static inline PyObject* unicode_slice2(
>             PyObject* text, Py_ssize_t start, Py_ssize_t stop, int flag);
> 
> /////////////// PyUnicode_Substring ///////////////
> 
> /* CHANGED */
> 
> static inline PyObject* unicode_slice2(
>             PyObject* text, Py_ssize_t start, Py_ssize_t stop, int flag) {
>     Py_ssize_t length;
> 
> #if CYTHON_PEP393_ENABLED
>     if (PyUnicode_READY(text) == -1) return NULL;
> #endif
> 
> if (flag) {
>     #if CYTHON_PEP393_ENABLED
>         length = PyUnicode_GET_LENGTH(text);
>     #else
>         length = PyUnicode_GET_SIZE(text);
>     #endif
>         if (start < 0) {
>             start += length;
>             if (start < 0)
>                 start = 0;
>         }
>         if (stop < 0)
>             stop += length;
>         else if (stop > length)
>             stop = length;
>         length = stop - start;
>         if (length <= 0)
>             return PyUnicode_FromUnicode(NULL, 0);
> }
> 
> #if CYTHON_PEP393_ENABLED
>     return PyUnicode_FromKindAndData(PyUnicode_KIND(text),
>         PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start);
> #else
>     return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start);
> #endif
> }
> 
> unicode_slice.pyx
> ------------------------
> 
> cdef extern from 'unicode_slice.h':
>     inline unicode unicode_slice(unicode ustring, int start, int stop)
>     inline unicode unicode_slice2(unicode ustring, int start, int
> stop, int flag)
> 
> cdef unicode text = u"abcdefghigklmnopqrstuvwxyzabcdefghigklmnopqrstuvwxyz"
> 
> cdef long f_1(unicode text):
>     cdef int i, j
>     cdef int n = len(text)
>     cdef int val
>     cdef long S = 0
> 
>     for j in range(100000):
>         for i in range(n):
>             val = len(unicode_slice(text, 0, i))
>             S += val * j
> 
>     return S
> 
> cdef long f_2(unicode text):
>     cdef int i, j
>     cdef int n = len(text)
>     cdef int val
>     cdef long S = 0
> 
>     for j in range(100000):
>         for i in range(n):
>             val = len(unicode_slice2(text, 0, i, 0))
>             S += val * j
> 
>     return S
> 
> 
> def test_1():
>     f_1(text)
> 
> def test_2():
>     f_2(text)
> 
> Here are timings:
> 
> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
> mytests.unicode_slice import test_1" "test_1()"
> 50 loops, best of 5: 534 msec per loop
> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
> mytests.unicode_slice import test_2" "test_2()"
> 50 loops, best of 5: 523 msec per loop
> 
> Only 2%

That's to be expected. Creating a Unicode string object is the highly
dominating operation here, including memory allocation, object type
selection and what not.

Stefan



More information about the cython-devel mailing list