[pypy-commit] pypy py3k: hg merge default

arigo pypy.commits at gmail.com
Wed Aug 31 16:58:21 EDT 2016


Author: Armin Rigo <arigo at tunes.org>
Branch: py3k
Changeset: r86794:adcb5fc61bbe
Date: 2016-08-31 22:57 +0200
http://bitbucket.org/pypy/pypy/changeset/adcb5fc61bbe/

Log:	hg merge default

diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py
--- a/rpython/rlib/rsre/rsre_core.py
+++ b/rpython/rlib/rsre/rsre_core.py
@@ -365,7 +365,9 @@
         for op1, checkerfn in unroll_char_checker:
             if op1 == op:
                 return checkerfn(ctx, ptr, ppos)
-        raise Error("next_char_ok[%d]" % op)
+        # obscure case: it should be a single char pattern, but isn't
+        # one of the opcodes in unroll_char_checker (see test_ext_opcode)
+        return sre_match(ctx, ppos, ptr, self.start_marks) is not None
 
 class AbstractUntilMatchResult(MatchResult):
 
@@ -743,7 +745,8 @@
             minptr = start + ctx.pat(ppos+1)
             if minptr > ctx.end:
                 return    # cannot match
-            ptr = find_repetition_end(ctx, ppos+3, start, ctx.pat(ppos+2))
+            ptr = find_repetition_end(ctx, ppos+3, start, ctx.pat(ppos+2),
+                                      marks)
             # when we arrive here, ptr points to the tail of the target
             # string.  check if the rest of the pattern matches,
             # and backtrack if not.
@@ -765,7 +768,7 @@
                 if minptr > ctx.end:
                     return   # cannot match
                 # count using pattern min as the maximum
-                ptr = find_repetition_end(ctx, ppos+3, ptr, min)
+                ptr = find_repetition_end(ctx, ppos+3, ptr, min, marks)
                 if ptr < minptr:
                     return   # did not match minimum number of times
 
@@ -812,7 +815,7 @@
     return True
 
 @specializectx
-def find_repetition_end(ctx, ppos, ptr, maxcount):
+def find_repetition_end(ctx, ppos, ptr, maxcount, marks):
     end = ctx.end
     ptrp1 = ptr + 1
     # First get rid of the cases where we don't have room for any match.
@@ -827,8 +830,11 @@
         if op1 == op:
             if checkerfn(ctx, ptr, ppos):
                 break
+            return ptr
     else:
-        return ptr
+        # obscure case: it should be a single char pattern, but isn't
+        # one of the opcodes in unroll_char_checker (see test_ext_opcode)
+        return general_find_repetition_end(ctx, ppos, ptr, maxcount, marks)
     # It matches at least once.  If maxcount == 1 (relatively common),
     # then we are done.
     if maxcount == 1:
@@ -846,6 +852,19 @@
     raise Error("rsre.find_repetition_end[%d]" % op)
 
 @specializectx
+def general_find_repetition_end(ctx, ppos, ptr, maxcount, marks):
+    # moved into its own JIT-opaque function
+    end = ctx.end
+    if maxcount != rsre_char.MAXREPEAT:
+        # adjust end
+        end1 = ptr + maxcount
+        if end1 <= end:
+            end = end1
+    while ptr < end and sre_match(ctx, ppos, ptr, marks) is not None:
+        ptr += 1
+    return ptr
+
+ at specializectx
 def match_ANY(ctx, ptr, ppos):   # dot wildcard.
     return not rsre_char.is_linebreak(ctx.str(ptr))
 def match_ANY_ALL(ctx, ptr, ppos):
diff --git a/rpython/rlib/rsre/test/test_ext_opcode.py b/rpython/rlib/rsre/test/test_ext_opcode.py
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rsre/test/test_ext_opcode.py
@@ -0,0 +1,26 @@
+"""
+Test for cases that cannot be produced using the Python 2.7 sre_compile
+module, but can be produced by other means (e.g. Python 3.5)
+"""
+
+from rpython.rlib.rsre import rsre_core
+from rpython.rlib.rsre.rsre_char import MAXREPEAT
+
+# import OPCODE_XX as XX
+for name, value in rsre_core.__dict__.items():
+    if name.startswith('OPCODE_') and isinstance(value, int):
+        globals()[name[7:]] = value
+
+
+def test_repeat_one_with_backref():
+    # Python 3.5 compiles "(.)\1*" using REPEAT_ONE instead of REPEAT:
+    # it's a valid optimization because \1 is always one character long
+    r = [MARK, 0, ANY, MARK, 1, REPEAT_ONE, 6, 0, MAXREPEAT, 
+         GROUPREF, 0, SUCCESS, SUCCESS]
+    assert rsre_core.match(r, "aaa").match_end == 3
+
+def test_min_repeat_one_with_backref():
+    # Python 3.5 compiles "(.)\1*?b" using MIN_REPEAT_ONE
+    r = [MARK, 0, ANY, MARK, 1, MIN_REPEAT_ONE, 6, 0, MAXREPEAT,
+         GROUPREF, 0, SUCCESS, LITERAL, 98, SUCCESS]
+    assert rsre_core.match(r, "aaab").match_end == 4


More information about the pypy-commit mailing list