[Cython] Add support for list/tuple slicing
Zaur Shibzukhov
szport at gmail.com
Fri Mar 8 08:49:47 CET 2013
2013/3/7 Zaur Shibzukhov <szport at gmail.com>:
> Current Cython generate for slicing of list/tuple general
> PySequence_GetSlice/SetSlice call.
> We could replace that to native call for Py{List|Tuple}_GetSlice and
> PyList_SetSlice for lists/tuples.
There is updated change that use utility code
__Pyx_Py{List|Tuple}_GetSlice because Py{List|Tuple}_GetSlice dosn't
support negative indices. That job do (in CPython) {list|tuple}slice
function from type object's slot ({list|tuple}_subscript), but it
handle both indices and slice objects which add overhead. That's the
reason why PySequence_GetSlice is slower: it create slice object and
falls to {list|tuple}_subscript. Therefore I added utility code.
Here is utility code:
/////////////// PyList_GetSlice.proto ///////////////
static PyObject* __Pyx_PyList_GetSlice(
PyObject* lst, Py_ssize_t start, Py_ssize_t stop);
/////////////// PyList_GetSlice ///////////////
PyObject* __Pyx_PyList_GetSlice(
PyObject* lst, Py_ssize_t start, Py_ssize_t stop) {
Py_ssize_t i, length;
PyListObject* np;
PyObject **src, **dest;
PyObject *v;
length = PyList_GET_SIZE(lst);
if (start < 0) {
start += length;
if (start < 0)
start = 0;
}
if (stop < 0)
stop += length;
else if (stop > length)
stop = length;
length = stop - start;
if (length <= 0)
return PyList_New(0);
np = (PyListObject*) PyList_New(length);
if (np == NULL)
return NULL;
src = ((PyListObject*)lst)->ob_item + start;
dest = np->ob_item;
for (i = 0; i < length; i++) {
v = src[i];
Py_INCREF(v);
dest[i] = v;
}
return (PyObject*)np;
}
/////////////// PyTuple_GetSlice.proto ///////////////
static PyObject* __Pyx_PyTuple_GetSlice(
PyObject* ob, Py_ssize_t start, Py_ssize_t stop);
/////////////// PyTuple_GetSlice ///////////////
PyObject* __Pyx_PyTuple_GetSlice(
PyObject* ob, Py_ssize_t start, Py_ssize_t stop) {
Py_ssize_t i, length;
PyTupleObject* np;
PyObject **src, **dest;
PyObject *v;
length = PyTuple_GET_SIZE(ob);
if (start < 0) {
start += length;
if (start < 0)
start = 0;
}
if (stop < 0)
stop += length;
else if (stop > length)
stop = length;
length = stop - start;
if (length <= 0)
return PyList_New(0);
np = (PyTupleObject *) PyTuple_New(length);
if (np == NULL)
return NULL;
src = ((PyTupleObject*)ob)->ob_item + start;
dest = np->ob_item;
for (i = 0; i < length; i++) {
v = src[i];
Py_INCREF(v);
dest[i] = v;
}
return (PyObject*)np;
}
Here is testing code:
list_slice.pyx
-----------------
from cpython.sequence cimport PySequence_GetSlice
cdef extern from "list_tuple_slices.h":
inline object __Pyx_PyList_GetSlice(object ob, int start, int stop)
inline object __Pyx_PyTuple_GetSlice(object ob, int start, int stop)
cdef list lst = list(range(10))
cdef list lst2 = list(range(7))
def get_slice1(list lst):
cdef int i
cdef list res = []
for i in range(200000):
res.append(PySequence_GetSlice(lst, 2, 8))
return res
def get_slice2(list lst):
cdef int i
cdef list res = []
for i in range(200000):
res.append(__Pyx_PyList_GetSlice(lst, 2, 8))
return res
def test_get_slice1():
get_slice1(lst)
def test_get_slice2():
get_slice2(lst)
tuple_slicing.pyx
-----------------------
from cpython.sequence cimport PySequence_GetSlice
cdef extern from "list_tuple_slices.h":
inline object __Pyx_PyList_GetSlice(object lst, int start, int stop)
inline object __Pyx_PyTuple_GetSlice(object ob, int start, int stop)
cdef tuple lst = tuple(range(10))
def get_slice1(tuple lst):
cdef int i
cdef list res = []
for i in range(200000):
res.append(PySequence_GetSlice(lst, 2, 8))
return res
def get_slice2(tuple lst):
cdef int i
cdef list res = []
for i in range(200000):
res.append(__Pyx_PyTuple_GetSlice(lst, 2, 8))
return res
def test_get_slice1():
get_slice1(lst)
def test_get_slice2():
get_slice2(lst)
Here are timings:
for list
(py33) zbook:mytests $ python -m timeit -n 100 -r 5 -v -s "from
mytests.list_slice import test_get_slice1" "test_get_slice1()"
raw times: 10.2 10.3 10.4 10.1 10.2
100 loops, best of 5: 101 msec per loop
(py33) zbook:mytests $ python -m timeit -n 100 -r 5 -v -s "from
mytests.list_slice import test_get_slice1" "test_get_slice1()"
raw times: 10.3 10.3 10.2 10.3 10.2
100 loops, best of 5: 102 msec per loop
(py33) zbook:mytests $ python -m timeit -n 100 -r 5 -v -s "from
mytests.list_slice import test_get_slice2" "test_get_slice2()"
raw times: 8.16 8.19 8.17 8.2 8.16
100 loops, best of 5: 81.6 msec per loop
(py33) zbook:mytests $ python -m timeit -n 100 -r 5 -v -s "from
mytests.list_slice import test_get_slice2" "test_get_slice2()"
raw times: 8.1 8.05 8.03 8.06 8.07
100 loops, best of 5: 80.3 msec per loop
for tuple
(py33) zbook:mytests $ python -m timeit -n 100 -r 5 -v -s "from
mytests.tuple_slice import test_get_slice1" "test_get_slice1()"
raw times: 7.2 7.16 7.16 7.18 7.17
100 loops, best of 5: 71.6 msec per loop
(py33) zbook:mytests $ python -m timeit -n 100 -r 5 -v -s "from
mytests.tuple_slice import test_get_slice1" "test_get_slice1()"
raw times: 7.22 7.22 7.19 7.18 7.18
100 loops, best of 5: 71.8 msec per loop
(py33) zbook:mytests $ python -m timeit -n 100 -r 5 -v -s "from
mytests.tuple_slice import test_get_slice2" "test_get_slice2()"
raw times: 9.23 5.2 4.95 4.96 4.98
100 loops, best of 5: 49.5 msec per loop
(py33) zbook:mytests $ python -m timeit -n 100 -r 5 -v -s "from
mytests.tuple_slice import test_get_slice2" "test_get_slice2()"
raw times: 4.92 4.93 4.9 4.94 4.92
100 loops, best of 5: 49 msec per loop
This change dosn't contain list slice assignments because previous
testing and timings showed that this need more analysis.
Maybe I'l make pull request with this change + tests?
Zaur Shibzukhov
More information about the cython-devel
mailing list