[pypy-svn] r75927 - pypy/branch/rsre2/pypy/rlib/rsre

arigo at codespeak.net arigo at codespeak.net
Tue Jul 6 17:35:37 CEST 2010


Author: arigo
Date: Tue Jul  6 17:35:36 2010
New Revision: 75927

Modified:
   pypy/branch/rsre2/pypy/rlib/rsre/rsre.py
Log:
Fast search.


Modified: pypy/branch/rsre2/pypy/rlib/rsre/rsre.py
==============================================================================
--- pypy/branch/rsre2/pypy/rlib/rsre/rsre.py	(original)
+++ pypy/branch/rsre2/pypy/rlib/rsre/rsre.py	Tue Jul  6 17:35:36 2010
@@ -711,6 +711,13 @@
 
 def search(pattern, string, start=0, flags=0):
     ctx = MatchContext(pattern, string, start, flags)
+    if ctx.pat(0) == OPCODE_INFO:
+        if ctx.pat(2) & rsre_char.SRE_INFO_PREFIX and ctx.pat(5) > 1:
+            return fast_search(ctx)
+    return regular_search(ctx)
+
+def regular_search(ctx):
+    start = ctx.match_start
     while start <= ctx.end:
         result = sre_match(ctx, 0, start, None)
         if result is not None:
@@ -720,3 +727,48 @@
             return ctx
         start += 1
     return None
+
+def fast_search(ctx):
+    # skips forward in a string as fast as possible using information from
+    # an optimization info block
+    # <INFO> <1=skip> <2=flags> <3=min> <4=...>
+    #        <5=length> <6=skip> <7=prefix data> <overlap data>
+    flags = ctx.pat(2)
+    prefix_len = ctx.pat(5)
+    assert prefix_len >= 0
+    prefix_skip = ctx.pat(6)
+    assert prefix_skip >= 0
+    overlap_offset = 7 + prefix_len - 1
+    assert overlap_offset >= 0
+    pattern_offset = ctx.pat(1) + 1
+    assert pattern_offset >= 0
+    i = 0
+    string_position = ctx.match_start
+    end = ctx.end
+    while string_position < end:
+        while True:
+            char_ord = ctx.str(string_position)
+            if char_ord != ctx.pat(7 + i):
+                if i == 0:
+                    break
+                else:
+                    i = ctx.pat(overlap_offset + i)
+            else:
+                i += 1
+                if i == prefix_len:
+                    # found a potential match
+                    if flags & rsre_char.SRE_INFO_LITERAL:
+                        return True # matched all of pure literal pattern
+                    start = string_position + 1 - prefix_len
+                    ptr = start + prefix_skip
+                    ppos = pattern_offset + 2 * prefix_skip
+                    result = sre_match(ctx, ppos, ptr, None)
+                    if result is not None:
+                        ctx.match_start = start
+                        ctx.match_end = result.end
+                        ctx.match_marks = result.marks
+                        return ctx
+                    i = ctx.pat(overlap_offset + i)
+                break
+        string_position += 1
+    return None



More information about the Pypy-commit mailing list