[Python-checkins] r77471 - in python/branches/py3k: Objects/stringlib/fastsearch.h Objects/unicodeobject.c

antoine.pitrou python-checkins at python.org
Wed Jan 13 15:19:13 CET 2010


Author: antoine.pitrou
Date: Wed Jan 13 15:19:12 2010
New Revision: 77471

Log:
Merged revisions 77469-77470 via svnmerge from 
svn+ssh://pythondev@svn.python.org/python/trunk

........
  r77469 | antoine.pitrou | 2010-01-13 14:43:37 +0100 (mer., 13 janv. 2010) | 3 lines
  
  Test commit to try to diagnose failures of the IA-64 buildbot
........
  r77470 | antoine.pitrou | 2010-01-13 15:01:26 +0100 (mer., 13 janv. 2010) | 3 lines
  
  Sanitize bloom filter macros
........


Modified:
   python/branches/py3k/   (props changed)
   python/branches/py3k/Objects/stringlib/fastsearch.h
   python/branches/py3k/Objects/unicodeobject.c

Modified: python/branches/py3k/Objects/stringlib/fastsearch.h
==============================================================================
--- python/branches/py3k/Objects/stringlib/fastsearch.h	(original)
+++ python/branches/py3k/Objects/stringlib/fastsearch.h	Wed Jan 13 15:19:12 2010
@@ -18,15 +18,27 @@
 #define FAST_SEARCH 1
 #define FAST_RSEARCH 2
 
-#define BLOOM_ADD(mask, ch) ((mask |= (1 << ((ch) & (LONG_BIT - 1)))))
-#define BLOOM(mask, ch)     ((mask &  (1 << ((ch) & (LONG_BIT - 1)))))
+#if LONG_BIT >= 128
+#define STRINGLIB_BLOOM_WIDTH 128
+#elif LONG_BIT >= 64
+#define STRINGLIB_BLOOM_WIDTH 64
+#elif LONG_BIT >= 32
+#define STRINGLIB_BLOOM_WIDTH 32
+#else
+#error "LONG_BIT is smaller than 32"
+#endif
+
+#define STRINGLIB_BLOOM_ADD(mask, ch) \
+    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
+#define STRINGLIB_BLOOM(mask, ch)     \
+    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
 
 Py_LOCAL_INLINE(Py_ssize_t)
 fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
            const STRINGLIB_CHAR* p, Py_ssize_t m,
            Py_ssize_t maxcount, int mode)
 {
-    long mask;
+    unsigned long mask;
     Py_ssize_t skip, count = 0;
     Py_ssize_t i, j, mlast, w;
 
@@ -70,12 +82,12 @@
 
         /* process pattern[:-1] */
         for (i = 0; i < mlast; i++) {
-            BLOOM_ADD(mask, p[i]);
+            STRINGLIB_BLOOM_ADD(mask, p[i]);
             if (p[i] == p[mlast])
                 skip = mlast - i - 1;
         }
         /* process pattern[-1] outside the loop */
-        BLOOM_ADD(mask, p[mlast]);
+        STRINGLIB_BLOOM_ADD(mask, p[mlast]);
 
         for (i = 0; i <= w; i++) {
             /* note: using mlast in the skip path slows things down on x86 */
@@ -95,13 +107,13 @@
                     continue;
                 }
                 /* miss: check if next character is part of pattern */
-                if (!BLOOM(mask, s[i+m]))
+                if (!STRINGLIB_BLOOM(mask, s[i+m]))
                     i = i + m;
                 else
                     i = i + skip;
             } else {
                 /* skip: check if next character is part of pattern */
-                if (!BLOOM(mask, s[i+m]))
+                if (!STRINGLIB_BLOOM(mask, s[i+m]))
                     i = i + m;
             }
         }
@@ -110,10 +122,10 @@
         /* create compressed boyer-moore delta 1 table */
 
         /* process pattern[0] outside the loop */
-        BLOOM_ADD(mask, p[0]);
+        STRINGLIB_BLOOM_ADD(mask, p[0]);
         /* process pattern[:0:-1] */
         for (i = mlast; i > 0; i--) {
-            BLOOM_ADD(mask, p[i]);
+            STRINGLIB_BLOOM_ADD(mask, p[i]);
             if (p[i] == p[0])
                 skip = i - 1;
         }
@@ -128,13 +140,13 @@
                     /* got a match! */
                     return i;
                 /* miss: check if previous character is part of pattern */
-                if (!BLOOM(mask, s[i-1]))
+                if (!STRINGLIB_BLOOM(mask, s[i-1]))
                     i = i - m;
                 else
                     i = i - skip;
             } else {
                 /* skip: check if previous character is part of pattern */
-                if (!BLOOM(mask, s[i-1]))
+                if (!STRINGLIB_BLOOM(mask, s[i-1]))
                     i = i - m;
             }
         }

Modified: python/branches/py3k/Objects/unicodeobject.c
==============================================================================
--- python/branches/py3k/Objects/unicodeobject.c	(original)
+++ python/branches/py3k/Objects/unicodeobject.c	Wed Jan 13 15:19:12 2010
@@ -206,12 +206,22 @@
 
 /* the linebreak mask is set up by Unicode_Init below */
 
+#if LONG_BIT >= 128
+#define BLOOM_WIDTH 128
+#elif LONG_BIT >= 64
+#define BLOOM_WIDTH 64
+#elif LONG_BIT >= 32
+#define BLOOM_WIDTH 32
+#else
+#error "LONG_BIT is smaller than 32"
+#endif
+
 #define BLOOM_MASK unsigned long
 
 static BLOOM_MASK bloom_linebreak;
 
-#define BLOOM_ADD(mask, ch) ((mask |= (1 << ((ch) & (LONG_BIT - 1)))))
-#define BLOOM(mask, ch)     ((mask &  (1 << ((ch) & (LONG_BIT - 1)))))
+#define BLOOM_ADD(mask, ch) ((mask |= (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
+#define BLOOM(mask, ch)     ((mask &  (1UL << ((ch) & (BLOOM_WIDTH - 1)))))
 
 #define BLOOM_LINEBREAK(ch)                                             \
     ((ch) < 128U ? ascii_linebreak[(ch)] :                              \
@@ -221,7 +231,7 @@
 {
     /* calculate simple bloom-style bitmask for a given unicode string */
 
-    long mask;
+    BLOOM_MASK mask;
     Py_ssize_t i;
 
     mask = 0;


More information about the Python-checkins mailing list