[Python-checkins] cpython: Optimize make_bloom_mask(), used by str.strip(), str.lstrip() and str.rstrip()
victor.stinner
python-checkins at python.org
Tue Apr 9 22:29:23 CEST 2013
http://hg.python.org/cpython/rev/9c49f01322e9
changeset: 83218:9c49f01322e9
user: Victor Stinner <victor.stinner at gmail.com>
date: Tue Apr 09 21:53:54 2013 +0200
summary:
Optimize make_bloom_mask(), used by str.strip(), str.lstrip() and str.rstrip()
Write specialized functions per Unicode kind to avoid the expensive
PyUnicode_READ() macro.
files:
Objects/unicodeobject.c | 32 ++++++++++++++++++++++++----
1 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -543,7 +543,6 @@
static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
-#define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
#define BLOOM(mask, ch) ((mask & (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
#define BLOOM_LINEBREAK(ch) \
@@ -553,16 +552,39 @@
Py_LOCAL_INLINE(BLOOM_MASK)
make_bloom_mask(int kind, void* ptr, Py_ssize_t len)
{
+#define BLOOM_UPDATE(TYPE, MASK, PTR, LEN) \
+ do { \
+ TYPE *data = (TYPE *)PTR; \
+ TYPE *end = data + LEN; \
+ Py_UCS4 ch; \
+ for (; data != end; data++) { \
+ ch = *data; \
+ MASK |= (1UL << (ch & (BLOOM_WIDTH - 1))); \
+ } \
+ break; \
+ } while (0)
+
/* calculate simple bloom-style bitmask for a given unicode string */
BLOOM_MASK mask;
- Py_ssize_t i;
mask = 0;
- for (i = 0; i < len; i++)
- BLOOM_ADD(mask, PyUnicode_READ(kind, ptr, i));
-
+ switch (kind) {
+ case PyUnicode_1BYTE_KIND:
+ BLOOM_UPDATE(Py_UCS1, mask, ptr, len);
+ break;
+ case PyUnicode_2BYTE_KIND:
+ BLOOM_UPDATE(Py_UCS2, mask, ptr, len);
+ break;
+ case PyUnicode_4BYTE_KIND:
+ BLOOM_UPDATE(Py_UCS4, mask, ptr, len);
+ break;
+ default:
+ assert(0);
+ }
return mask;
+
+#undef BLOOM_UPDATE
}
#define BLOOM_MEMBER(mask, chr, str) \
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list