[Python-checkins] r46394 - in sandbox/trunk/hotbuffer: Modules/_hotbuf.c Modules/fastsearch.h test_hotbuf.py
bob.ippolito
python-checkins at python.org
Fri May 26 21:13:41 CEST 2006
Author: bob.ippolito
Date: Fri May 26 21:13:40 2006
New Revision: 46394
Added:
sandbox/trunk/hotbuffer/Modules/fastsearch.h (contents, props changed)
Modified:
sandbox/trunk/hotbuffer/Modules/_hotbuf.c
sandbox/trunk/hotbuffer/test_hotbuf.py
Log:
add fast find and count functions
Modified: sandbox/trunk/hotbuffer/Modules/_hotbuf.c
==============================================================================
--- sandbox/trunk/hotbuffer/Modules/_hotbuf.c (original)
+++ sandbox/trunk/hotbuffer/Modules/_hotbuf.c Fri May 26 21:13:40 2006
@@ -20,7 +20,13 @@
#define writebufferproc getwritebufferproc
#define charbufferproc getcharbufferproc
#define segcountproc getsegcountproc
+#ifndef Py_LOCAL(rval)
+#define Py_LOCAL static rval
#endif
+#endif
+
+#define STRINGLIB_CHAR char
+#include "fastsearch.h"
static PyTypeObject PyHotbuf_Type;
static PyObject *HotbufOverflow = NULL;
@@ -685,7 +691,7 @@
"incorrect input type, require string");
return NULL;
}
- instring = PyString_AsString(arg);
+ instring = PyString_AS_STRING(arg);
len = PyString_GET_SIZE(arg);
CHECK_LIMIT_ERROR(len);
@@ -759,6 +765,65 @@
return result;
}
+PyDoc_STRVAR(find__doc__,
+"B.find(sub) -> int\n\
+\n\
+Return the lowest index in S where substring sub is found,\n\
+such that sub is contained within the current window.\n\
+\n\
+Return -1 on failure.");
+
+static PyObject*
+hotbuf_find(PyHotbufObject *self, PyObject* arg)
+{
+ Py_ssize_t result;
+
+ /* Check and extract input string */
+ if ( arg == NULL || !PyString_Check(arg) ) {
+ PyErr_SetString(PyExc_TypeError,
+ "incorrect input type, require string");
+ return NULL;
+ }
+
+ result = fastsearch(self->b_ptr + self->b_position,
+ self->b_limit - self->b_position,
+ PyString_AS_STRING(arg),
+ PyString_GET_SIZE(arg),
+ FAST_SEARCH);
+
+ return PyInt_FromSsize_t(result);
+}
+
+PyDoc_STRVAR(count__doc__,
+"B.count(sub) -> int\n\
+\n\
+Return the number of occurrences of substring sub in\n\
+the current window.");
+
+static PyObject*
+hotbuf_count(PyHotbufObject *self, PyObject* arg)
+{
+ Py_ssize_t result;
+
+ /* Check and extract input string */
+ if ( arg == NULL || !PyString_Check(arg) ) {
+ PyErr_SetString(PyExc_TypeError,
+ "incorrect input type, require string");
+ return NULL;
+ }
+
+ result = fastsearch(self->b_ptr + self->b_position,
+ self->b_limit - self->b_position,
+ PyString_AS_STRING(arg),
+ PyString_GET_SIZE(arg),
+ FAST_COUNT);
+
+ if (result == -1)
+ result = 0;
+
+ return PyInt_FromSsize_t(result);
+}
+
/* ===========================================================================
* Buffer protocol methods
@@ -941,6 +1006,8 @@
{"getstr", (PyCFunction)hotbuf_getstr, METH_VARARGS, getstr__doc__},
{"putstr", (PyCFunction)hotbuf_putstr, METH_O, putstr__doc__},
{"unpack", (PyCFunction)hotbuf_unpack, METH_O, unpack__doc__},
+ {"find", (PyCFunction)hotbuf_find, METH_O, find__doc__},
+ {"count", (PyCFunction)hotbuf_count, METH_O, count__doc__},
{NULL, NULL} /* sentinel */
};
Added: sandbox/trunk/hotbuffer/Modules/fastsearch.h
==============================================================================
--- (empty file)
+++ sandbox/trunk/hotbuffer/Modules/fastsearch.h Fri May 26 21:13:40 2006
@@ -0,0 +1,104 @@
+/* stringlib: fastsearch implementation */
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#define STRINGLIB_FASTSEARCH_H
+
+/* fast search/count implementation, based on a mix between boyer-
+ moore and horspool, with a few more bells and whistles on the top.
+ for some more background, see: http://effbot.org/stringlib */
+
+/* note: fastsearch may access s[n], which isn't a problem when using
+ Python's ordinary string types, but may cause problems if you're
+ using this code in other contexts. also, the count mode returns -1
+ if there cannot possible be a match in the target string, and 0 if
+ it has actually checked for matches, but didn't find any. callers
+ beware! */
+
+#define FAST_COUNT 0
+#define FAST_SEARCH 1
+
+Py_LOCAL(Py_ssize_t)
+fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
+ const STRINGLIB_CHAR* p, Py_ssize_t m,
+ int mode)
+{
+ long mask;
+ Py_ssize_t skip, count = 0;
+ Py_ssize_t i, j, mlast, w;
+
+ w = n - m;
+
+ if (w < 0)
+ return -1;
+
+ /* look for special cases */
+ if (m <= 1) {
+ if (m <= 0)
+ return -1;
+ /* use special case for 1-character strings */
+ if (mode == FAST_COUNT) {
+ for (i = 0; i < n; i++)
+ if (s[i] == p[0])
+ count++;
+ return count;
+ } else {
+ for (i = 0; i < n; i++)
+ if (s[i] == p[0])
+ return i;
+ }
+ return -1;
+ }
+
+ mlast = m - 1;
+
+ /* create compressed boyer-moore delta 1 table */
+ skip = mlast - 1;
+ /* process pattern[:-1] */
+ for (mask = i = 0; i < mlast; i++) {
+ mask |= (1 << (p[i] & 0x1F));
+ if (p[i] == p[mlast])
+ skip = mlast - i - 1;
+ }
+ /* process pattern[-1] outside the loop */
+ mask |= (1 << (p[mlast] & 0x1F));
+
+ for (i = 0; i <= w; i++) {
+ /* note: using mlast in the skip path slows things down on x86 */
+ if (s[i+m-1] == p[m-1]) {
+ /* candidate match */
+ for (j = 0; j < mlast; j++)
+ if (s[i+j] != p[j])
+ break;
+ if (j == mlast) {
+ /* got a match! */
+ if (mode != FAST_COUNT)
+ return i;
+ count++;
+ i = i + mlast;
+ continue;
+ }
+ /* miss: check if next character is part of pattern */
+ if (!(mask & (1 << (s[i+m] & 0x1F))))
+ i = i + m;
+ else
+ i = i + skip;
+ } else {
+ /* skip: check if next character is part of pattern */
+ if (!(mask & (1 << (s[i+m] & 0x1F))))
+ i = i + m;
+ }
+ }
+
+ if (mode != FAST_COUNT)
+ return -1;
+ return count;
+}
+
+#endif
+
+/*
+Local variables:
+c-basic-offset: 4
+indent-tabs-mode: nil
+End:
+*/
Modified: sandbox/trunk/hotbuffer/test_hotbuf.py
==============================================================================
--- sandbox/trunk/hotbuffer/test_hotbuf.py (original)
+++ sandbox/trunk/hotbuffer/test_hotbuf.py Fri May 26 21:13:40 2006
@@ -227,6 +227,55 @@
self.assertEquals(str(b), '')
self.assertEquals(b.getstr(), '')
+ def test_count(self):
+ b = hotbuf(CAPACITY)
+ b.putstr('abcddddd')
+ b.flip()
+ b.limit = 3
+ self.assertEquals(b.count('ab'), 1)
+ self.assertEquals(b.count('d'), 0)
+ self.assertEquals(b.count('1' * 100), 0)
+ b.limit = 4
+ self.assertEquals(b.count('ab'), 1)
+ self.assertEquals(b.count('d'), 1)
+ self.assertEquals(b.count('1' * 100), 0)
+ b.limit = 5
+ self.assertEquals(b.count('ab'), 1)
+ self.assertEquals(b.count('d'), 2)
+ self.assertEquals(b.count('1' * 100), 0)
+ b.advance(1)
+ self.assertEquals(b.count('ab'), 0)
+ self.assertEquals(b.count('d'), 2)
+ self.assertEquals(b.count('1' * 100), 0)
+ b.limit += 1
+ self.assertEquals(b.count('ab'), 0)
+ self.assertEquals(b.count('d'), 3)
+ self.assertEquals(b.count('1' * 100), 0)
+
+ def test_find(self):
+ b = hotbuf(CAPACITY)
+ b.putstr('abcddddd')
+ b.flip()
+ b.limit = 3
+ self.assertEquals(b.find('ab'), 0)
+ self.assertEquals(b.find('d'), -1)
+ self.assertEquals(b.find('1' * 100), -1)
+ b.limit = 4
+ self.assertEquals(b.find('ab'), 0)
+ self.assertEquals(b.find('d'), 3)
+ self.assertEquals(b.find('1' * 100), -1)
+ b.limit = 5
+ self.assertEquals(b.find('ab'), 0)
+ self.assertEquals(b.find('d'), 3)
+ self.assertEquals(b.find('1' * 100), -1)
+ b.advance(1)
+ self.assertEquals(b.find('ab'), -1)
+ self.assertEquals(b.find('d'), 2)
+ self.assertEquals(b.find('1' * 100), -1)
+ b.limit += 1
+ self.assertEquals(b.find('ab'), -1)
+ self.assertEquals(b.find('d'), 2)
+ self.assertEquals(b.find('1' * 100), -1)
def test_main():
test_support.run_unittest(HotbufTestCase)
More information about the Python-checkins
mailing list