[pypy-commit] pypy default: (arigo prompted by fijal)

Armin Rigo noreply at buildbot.pypy.org
Wed Jun 1 18:29:40 CEST 2011


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r44628:ddf426bd739d
Date: 2011-06-01 18:42 +0200
http://bitbucket.org/pypy/pypy/changeset/ddf426bd739d/

Log:	(arigo prompted by fijal)

	Change find_repetition_end() to always inline the first check. Helps
	a lot in examples like the one in the newly added test, by removing
	the residual call to fre().

diff --git a/pypy/rlib/rsre/rsre_core.py b/pypy/rlib/rsre/rsre_core.py
--- a/pypy/rlib/rsre/rsre_core.py
+++ b/pypy/rlib/rsre/rsre_core.py
@@ -759,17 +759,27 @@
 @specializectx
 def find_repetition_end(ctx, ppos, ptr, maxcount):
     end = ctx.end
-    if maxcount <= 1:
-        if maxcount == 1 and ptr < end:
-            # Relatively common case: maxcount == 1.  If we are not at the
-            # end of the string, it's done by a single direct check.
-            op = ctx.pat(ppos)
-            for op1, checkerfn in unroll_char_checker:
-                if op1 == op:
-                    if checkerfn(ctx, ptr, ppos):
-                        return ptr + 1
+    ptrp1 = ptr + 1
+    # First get rid of the cases where we don't have room for any match.
+    if maxcount <= 0 or ptrp1 > end:
         return ptr
-    elif maxcount != 65535:
+    # Check the first character directly.  If it doesn't match, we are done.
+    # The idea is to be fast for cases like re.search("b+"), where we expect
+    # the common case to be a non-match.  It's much faster with the JIT to
+    # have the non-match inlined here rather than detect it in the fre() call.
+    op = ctx.pat(ppos)
+    for op1, checkerfn in unroll_char_checker:
+        if op1 == op:
+            if checkerfn(ctx, ptr, ppos):
+                break
+    else:
+        return ptr
+    # It matches at least once.  If maxcount == 1 (relatively common),
+    # then we are done.
+    if maxcount == 1:
+        return ptrp1
+    # Else we really need to count how many times it matches.
+    if maxcount != 65535:
         # adjust end
         end1 = ptr + maxcount
         if end1 <= end:
@@ -777,7 +787,7 @@
     op = ctx.pat(ppos)
     for op1, fre in unroll_fre_checker:
         if op1 == op:
-            return fre(ctx, ptr, end, ppos)
+            return fre(ctx, ptrp1, end, ppos)
     raise Error("rsre.find_repetition_end[%d]" % op)
 
 @specializectx
diff --git a/pypy/rlib/rsre/test/test_zjit.py b/pypy/rlib/rsre/test/test_zjit.py
--- a/pypy/rlib/rsre/test/test_zjit.py
+++ b/pypy/rlib/rsre/test/test_zjit.py
@@ -160,3 +160,9 @@
         res = self.meta_interp_match(r"<[\S ]+>", "<..a   .. aa>")
         assert res == 13
         self.check_enter_count(1)
+
+
+    def test_find_repetition_end_fastpath(self):
+        res = self.meta_interp_search(r"b+", "a"*30 + "b")
+        assert res == 30
+        self.check_loops(call=0)


More information about the pypy-commit mailing list