[Cython] About IndexNode and unicode[index]

Fri Mar 1 08:37:00 CET 2013

2013/3/1 ZS <szport at gmail.com>:
> 2013/3/1 Stefan Behnel <stefan_ml at behnel.de>:
>> ZS, 28.02.2013 21:07:
>>> 2013/2/28 Stefan Behnel:
>>>>> This allows to write unicode text parsing code almost at C speed
>>>>> mostly in python (+ .pxd defintions).
>>>>
>>>> I suggest simply adding a constant flag argument to the existing function
>>>> that states if checking should be done or not. Inlining will let the C
>>>> compiler drop the corresponding code, which may or may nor make it a little
>>>> faster.
>>>
>>> static inline Py_UCS4 unicode_char2(PyObject* ustring, Py_ssize_t i, int flag) {
>>>     Py_ssize_t length;
>>> #if CYTHON_PEP393_ENABLED
>>>     if (PyUnicode_READY(ustring) < 0) return (Py_UCS4)-1;
>>> #endif
>>>     if (flag) {
>>>         length = __Pyx_PyUnicode_GET_LENGTH(ustring);
>>>         if ((0 <= i) & (i < length)) {
>>>             return __Pyx_PyUnicode_READ_CHAR(ustring, i);
>>>         } else if ((-length <= i) & (i < 0)) {
>>>             return __Pyx_PyUnicode_READ_CHAR(ustring, i + length);
>>>         } else {
>>>             PyErr_SetString(PyExc_IndexError, "string index out of range");
>>>             return (Py_UCS4)-1;
>>>         }
>>>     } else {
>>>         return __Pyx_PyUnicode_READ_CHAR(ustring, i);
>>>     }
>>> }
>>
>> I think you could even pass in two flags, one for wraparound and one for
>> boundscheck, and then just evaluate them appropriately in the existing "if"
>> tests above. That should allow both features to be supported independently
>> in a fast way.
>>
>>
>>> Here are timings:
>>>
>>> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
>>> mytests.unicode_index import test_1" "test_1()"
>>> 50 loops, best of 5: 152 msec per loop
>>> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
>>> mytests.unicode_index import test_2" "test_2()"
>>> 50 loops, best of 5: 86.5 msec per loop
>>> (py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
>>> mytests.unicode_index import test_3" "test_3()"
>>> 50 loops, best of 5: 86.5 msec per loop
>>>
>>> So your suggestion would be preferable.
>>
>> Nice. Yes, looks like it' worth it.
>>
>
> Sure that same could be applied to unicode slicing too.
>
I had to verify myself first. So here is the test...

unicode_slice.h
---------------------

#include "unicodeobject.h"

static inline PyObject* unicode_slice(
            PyObject* text, Py_ssize_t start, Py_ssize_t stop);

/////////////// PyUnicode_Substring ///////////////

/* CURRENT */

static inline PyObject* unicode_slice(
            PyObject* text, Py_ssize_t start, Py_ssize_t stop) {
    Py_ssize_t length;
#if CYTHON_PEP393_ENABLED
    if (PyUnicode_READY(text) == -1) return NULL;
    length = PyUnicode_GET_LENGTH(text);
#else
    length = PyUnicode_GET_SIZE(text);
#endif
    if (start < 0) {
        start += length;
        if (start < 0)
            start = 0;
    }
    if (stop < 0)
        stop += length;
    else if (stop > length)
        stop = length;
    length = stop - start;
    if (length <= 0)
        return PyUnicode_FromUnicode(NULL, 0);
#if CYTHON_PEP393_ENABLED
    return PyUnicode_FromKindAndData(PyUnicode_KIND(text),
        PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start);
#else
    return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start);
#endif
}

static inline PyObject* unicode_slice2(
            PyObject* text, Py_ssize_t start, Py_ssize_t stop, int flag);

/////////////// PyUnicode_Substring ///////////////

/* CHANGED */

static inline PyObject* unicode_slice2(
            PyObject* text, Py_ssize_t start, Py_ssize_t stop, int flag) {
    Py_ssize_t length;

#if CYTHON_PEP393_ENABLED
    if (PyUnicode_READY(text) == -1) return NULL;
#endif

if (flag) {
    #if CYTHON_PEP393_ENABLED
        length = PyUnicode_GET_LENGTH(text);
    #else
        length = PyUnicode_GET_SIZE(text);
    #endif
        if (start < 0) {
            start += length;
            if (start < 0)
                start = 0;
        }
        if (stop < 0)
            stop += length;
        else if (stop > length)
            stop = length;
        length = stop - start;
        if (length <= 0)
            return PyUnicode_FromUnicode(NULL, 0);
}

#if CYTHON_PEP393_ENABLED
    return PyUnicode_FromKindAndData(PyUnicode_KIND(text),
        PyUnicode_1BYTE_DATA(text) + start*PyUnicode_KIND(text), stop-start);
#else
    return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(text)+start, stop-start);
#endif
}

unicode_slice.pyx
------------------------

cdef extern from 'unicode_slice.h':
    inline unicode unicode_slice(unicode ustring, int start, int stop)
    inline unicode unicode_slice2(unicode ustring, int start, int
stop, int flag)

cdef unicode text = u"abcdefghigklmnopqrstuvwxyzabcdefghigklmnopqrstuvwxyz"

cdef long f_1(unicode text):
    cdef int i, j
    cdef int n = len(text)
    cdef int val
    cdef long S = 0

    for j in range(100000):
        for i in range(n):
            val = len(unicode_slice(text, 0, i))
            S += val * j

    return S

cdef long f_2(unicode text):
    cdef int i, j
    cdef int n = len(text)
    cdef int val
    cdef long S = 0

    for j in range(100000):
        for i in range(n):
            val = len(unicode_slice2(text, 0, i, 0))
            S += val * j

    return S

def test_1():
    f_1(text)

def test_2():
    f_2(text)

Here are timings:

(py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
mytests.unicode_slice import test_1" "test_1()"
50 loops, best of 5: 534 msec per loop
(py33) zbook:mytests $ python3.3 -m timeit -n 50 -r 5 -s "from
mytests.unicode_slice import test_2" "test_2()"
50 loops, best of 5: 523 msec per loop

Only 2%

Zaur Shibzukhov