# Match beginning of two strings

Richie Hindle richie at entrian.com
Mon Aug 4 17:38:28 CEST 2003

```[Alex]
> I'm not sure where I went wrong in
> the Pyrex coding (it doesn't seem to be performing anywhere
> as well as I thought it might) and I'll be happy for real
> Pyrex expert to show me the way.

I'm no an expert, but I can see a few easily-fixed problems.  The line:

if a[i] != b[i]:

is working with Python strings when it could be working with C
strings.  Here's the original code and its output on my machine:

def exa(a, b):
cdef int la
cdef int lb
la = len(a)
lb = len(b)
cdef int lmin
lmin = min(la, lb)
cdef int i
i = 0
while i < lmin:
if a[i] != b[i]:
return a[:i]
i = i + 1
if lmin == la:
return a
else:
return b

100000 loops, best of 3: 9.11 usec per loop

Here's a modified version of the code comparing C strings:

def exa(a, b):
cdef char* c_a   # `a` as a C string
cdef char* c_b   # `b` as a C string
cdef int la
cdef int lb

c_a = a
c_b = b
la = len(a)
lb = len(b)
cdef int lmin
lmin = min(la, lb)
cdef int i
i = 0
while i < lmin:
if c_a[i] != c_b[i]:
return a[:i]
i = i + 1
if lmin == la:
return a
else:
return b

100000 loops, best of 3: 5.79 usec per loop

Almost twice as fast.  Looking at the generated C is always
worthwhile when optimising Pyrex code - here's the code that does
the comparison against Python strings:

/* "D:\src\tests\pyrex\exa.pyx":11 */
__pyx_3 = PyInt_FromLong(__pyx_v_i); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; goto __pyx_L1;}
__pyx_1 = PyObject_GetItem(__pyx_v_a, __pyx_3); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; goto __pyx_L1;}
Py_DECREF(__pyx_3); __pyx_3 = 0;
__pyx_5 = PyInt_FromLong(__pyx_v_i); if (!__pyx_5) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; goto __pyx_L1;}
__pyx_2 = PyObject_GetItem(__pyx_v_b, __pyx_5); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; goto __pyx_L1;}
Py_DECREF(__pyx_5); __pyx_5 = 0;
if (PyObject_Cmp(__pyx_1, __pyx_2, &__pyx_4) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; goto __pyx_L1;}
__pyx_4 = __pyx_4 != 0;
Py_DECREF(__pyx_1); __pyx_1 = 0;
Py_DECREF(__pyx_2); __pyx_2 = 0;
if (__pyx_4) {

vs.

/* "D:\src\tests\pyrex\exa.pyx":16 */
__pyx_5 = ((__pyx_v_c_a[__pyx_v_i]) != (__pyx_v_c_b[__pyx_v_i]));
if (__pyx_5) {

for C strings.  There's another similar optimisation that the C
output leads you to: you can use strlen rather than Python's len:

cdef extern from "string.h":
int strlen(char*)

def exa(a, b):
cdef char* c_a   # `a` as a C string
cdef char* c_b   # `b` as a C string
cdef int la
cdef int lb

c_a = a
c_b = b
la = strlen(c_a)
lb = strlen(c_b)
cdef int lmin
lmin = min(la, lb)
cdef int i
i = 0
while i < lmin:
if c_a[i] != c_b[i]:
return a[:i]
i = i + 1
if lmin == la:
return a
else:
return b

100000 loops, best of 3: 3.58 usec per loop

That replaces:

/* "D:\src\tests\pyrex\exa.pyx":4 */
__pyx_1 = __Pyx_GetName(__pyx_b, "len"); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; goto __pyx_L1;}
__pyx_2 = PyTuple_New(1); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; goto __pyx_L1;}
Py_INCREF(__pyx_v_a);
PyTuple_SET_ITEM(__pyx_2, 0, __pyx_v_a);
__pyx_3 = PyObject_CallObject(__pyx_1, __pyx_2); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; goto __pyx_L1;}
Py_DECREF(__pyx_1); __pyx_1 = 0;
Py_DECREF(__pyx_2); __pyx_2 = 0;
__pyx_4 = PyInt_AsLong(__pyx_3); if (PyErr_Occurred()) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 4; goto __pyx_L1;}
Py_DECREF(__pyx_3); __pyx_3 = 0;
__pyx_v_la = __pyx_4;

/* "D:\src\tests\pyrex\exa.pyx":5 */
__pyx_1 = __Pyx_GetName(__pyx_b, "len"); if (!__pyx_1) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 5; goto __pyx_L1;}
__pyx_2 = PyTuple_New(1); if (!__pyx_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 5; goto __pyx_L1;}
Py_INCREF(__pyx_v_b);
PyTuple_SET_ITEM(__pyx_2, 0, __pyx_v_b);
__pyx_3 = PyObject_CallObject(__pyx_1, __pyx_2); if (!__pyx_3) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 5; goto __pyx_L1;}
Py_DECREF(__pyx_1); __pyx_1 = 0;
Py_DECREF(__pyx_2); __pyx_2 = 0;
__pyx_4 = PyInt_AsLong(__pyx_3); if (PyErr_Occurred()) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 5; goto __pyx_L1;}
Py_DECREF(__pyx_3); __pyx_3 = 0;
__pyx_v_lb = __pyx_4;

with :

/* "D:\src\tests\pyrex\exa.pyx":12 */
__pyx_v_la = strlen(__pyx_v_c_a);

/* "D:\src\tests\pyrex\exa.pyx":13 */
__pyx_v_lb = strlen(__pyx_v_c_b);

and leaves the call to 'min' as the only remaining huge block of C.
The final version looks like this, eliminating 'min' (Greg, can we have
the terary operator in Pyrex please? <good_mood ? wink : frown>)

cdef extern from "string.h":
int strlen(char*)

def exa(a, b):
cdef char* c_a   # `a` as a C string
cdef char* c_b   # `b` as a C string
cdef int la
cdef int lb

c_a = a
c_b = b
la = strlen(c_a)
lb = strlen(c_b)
cdef int lmin
if la < lb:
lmin = la
else:
lmin = lb
cdef int i
i = 0
while i < lmin:
if c_a[i] != c_b[i]:
return a[:i]
i = i + 1
if lmin == la:
return a
else:
return b

1000000 loops, best of 3: 0.803 usec per loop

Over ten times quicker than the original, for the sake of a couple of
small tweaks driven by looking at the C output.  Although the C still
looks very verbose at first glance, it's now substantially the same as
Alex's cexa.c.

Hope that helps,

--
Richie Hindle
richie at entrian.com

```