[pypy-commit] pypy py3k: hg merge default
arigo
pypy.commits at gmail.com
Wed Aug 31 16:58:21 EDT 2016
Author: Armin Rigo <arigo at tunes.org>
Branch: py3k
Changeset: r86794:adcb5fc61bbe
Date: 2016-08-31 22:57 +0200
http://bitbucket.org/pypy/pypy/changeset/adcb5fc61bbe/
Log: hg merge default
diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py
--- a/rpython/rlib/rsre/rsre_core.py
+++ b/rpython/rlib/rsre/rsre_core.py
@@ -365,7 +365,9 @@
for op1, checkerfn in unroll_char_checker:
if op1 == op:
return checkerfn(ctx, ptr, ppos)
- raise Error("next_char_ok[%d]" % op)
+ # obscure case: it should be a single char pattern, but isn't
+ # one of the opcodes in unroll_char_checker (see test_ext_opcode)
+ return sre_match(ctx, ppos, ptr, self.start_marks) is not None
class AbstractUntilMatchResult(MatchResult):
@@ -743,7 +745,8 @@
minptr = start + ctx.pat(ppos+1)
if minptr > ctx.end:
return # cannot match
- ptr = find_repetition_end(ctx, ppos+3, start, ctx.pat(ppos+2))
+ ptr = find_repetition_end(ctx, ppos+3, start, ctx.pat(ppos+2),
+ marks)
# when we arrive here, ptr points to the tail of the target
# string. check if the rest of the pattern matches,
# and backtrack if not.
@@ -765,7 +768,7 @@
if minptr > ctx.end:
return # cannot match
# count using pattern min as the maximum
- ptr = find_repetition_end(ctx, ppos+3, ptr, min)
+ ptr = find_repetition_end(ctx, ppos+3, ptr, min, marks)
if ptr < minptr:
return # did not match minimum number of times
@@ -812,7 +815,7 @@
return True
@specializectx
-def find_repetition_end(ctx, ppos, ptr, maxcount):
+def find_repetition_end(ctx, ppos, ptr, maxcount, marks):
end = ctx.end
ptrp1 = ptr + 1
# First get rid of the cases where we don't have room for any match.
@@ -827,8 +830,11 @@
if op1 == op:
if checkerfn(ctx, ptr, ppos):
break
+ return ptr
else:
- return ptr
+ # obscure case: it should be a single char pattern, but isn't
+ # one of the opcodes in unroll_char_checker (see test_ext_opcode)
+ return general_find_repetition_end(ctx, ppos, ptr, maxcount, marks)
# It matches at least once. If maxcount == 1 (relatively common),
# then we are done.
if maxcount == 1:
@@ -846,6 +852,19 @@
raise Error("rsre.find_repetition_end[%d]" % op)
@specializectx
+def general_find_repetition_end(ctx, ppos, ptr, maxcount, marks):
+ # moved into its own JIT-opaque function
+ end = ctx.end
+ if maxcount != rsre_char.MAXREPEAT:
+ # adjust end
+ end1 = ptr + maxcount
+ if end1 <= end:
+ end = end1
+ while ptr < end and sre_match(ctx, ppos, ptr, marks) is not None:
+ ptr += 1
+ return ptr
+
+ at specializectx
def match_ANY(ctx, ptr, ppos): # dot wildcard.
return not rsre_char.is_linebreak(ctx.str(ptr))
def match_ANY_ALL(ctx, ptr, ppos):
diff --git a/rpython/rlib/rsre/test/test_ext_opcode.py b/rpython/rlib/rsre/test/test_ext_opcode.py
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/rsre/test/test_ext_opcode.py
@@ -0,0 +1,26 @@
+"""
+Test for cases that cannot be produced using the Python 2.7 sre_compile
+module, but can be produced by other means (e.g. Python 3.5)
+"""
+
+from rpython.rlib.rsre import rsre_core
+from rpython.rlib.rsre.rsre_char import MAXREPEAT
+
+# import OPCODE_XX as XX
+for name, value in rsre_core.__dict__.items():
+ if name.startswith('OPCODE_') and isinstance(value, int):
+ globals()[name[7:]] = value
+
+
+def test_repeat_one_with_backref():
+ # Python 3.5 compiles "(.)\1*" using REPEAT_ONE instead of REPEAT:
+ # it's a valid optimization because \1 is always one character long
+ r = [MARK, 0, ANY, MARK, 1, REPEAT_ONE, 6, 0, MAXREPEAT,
+ GROUPREF, 0, SUCCESS, SUCCESS]
+ assert rsre_core.match(r, "aaa").match_end == 3
+
+def test_min_repeat_one_with_backref():
+ # Python 3.5 compiles "(.)\1*?b" using MIN_REPEAT_ONE
+ r = [MARK, 0, ANY, MARK, 1, MIN_REPEAT_ONE, 6, 0, MAXREPEAT,
+ GROUPREF, 0, SUCCESS, LITERAL, 98, SUCCESS]
+ assert rsre_core.match(r, "aaab").match_end == 4
More information about the pypy-commit
mailing list