[pypy-commit] pypy default: Tests and fix (thanks defnull): handle zero-width matches differently in
arigo
noreply at buildbot.pypy.org
Mon Jun 9 19:57:21 CEST 2014
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r72006:c4a666833c26
Date: 2014-06-09 19:56 +0200
http://bitbucket.org/pypy/pypy/changeset/c4a666833c26/
Log: Tests and fix (thanks defnull): handle zero-width matches
differently in greedy repetition operators, in what is hopefully the
same way as CPython.
diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py
--- a/rpython/rlib/rsre/rsre_core.py
+++ b/rpython/rlib/rsre/rsre_core.py
@@ -418,32 +418,33 @@
marks = p.marks
enum = p.enum.move_to_next_result(ctx)
#
- # zero-width match protection
min = ctx.pat(ppos+1)
- if self.num_pending >= min:
- while enum is not None and ptr == ctx.match_end:
- enum = enum.move_to_next_result(ctx)
- # matched marks for zero-width assertions
- marks = ctx.match_marks
- #
if enum is not None:
# matched one more 'item'. record it and continue.
+ last_match_length = ctx.match_end - ptr
self.pending = Pending(ptr, marks, enum, self.pending)
self.num_pending += 1
ptr = ctx.match_end
marks = ctx.match_marks
- match_more = True
- else:
- # 'item' no longer matches.
- if self.num_pending >= min:
- # try to match 'tail' if we have enough 'item'
- result = sre_match(ctx, tailppos, ptr, marks)
- if result is not None:
- self.subresult = result
- self.cur_ptr = ptr
- self.cur_marks = marks
- return self
- match_more = False
+ if last_match_length == 0 and self.num_pending >= min:
+ # zero-width protection: after an empty match, if there
+ # are enough matches, don't try to match more. Instead,
+ # fall through to trying to match 'tail'.
+ pass
+ else:
+ match_more = True
+ continue
+
+ # 'item' no longer matches.
+ if self.num_pending >= min:
+ # try to match 'tail' if we have enough 'item'
+ result = sre_match(ctx, tailppos, ptr, marks)
+ if result is not None:
+ self.subresult = result
+ self.cur_ptr = ptr
+ self.cur_marks = marks
+ return self
+ match_more = False
class MinUntilMatchResult(AbstractUntilMatchResult):
diff --git a/rpython/rlib/rsre/test/test_search.py b/rpython/rlib/rsre/test/test_search.py
--- a/rpython/rlib/rsre/test/test_search.py
+++ b/rpython/rlib/rsre/test/test_search.py
@@ -149,8 +149,11 @@
def test_empty_maxuntil(self):
r_code, r = get_code_and_re(r'(a?)+y')
assert r.match('y')
+ assert r.match('aaayaaay').span() == (0, 4)
res = rsre_core.match(r_code, 'y')
assert res
+ res = rsre_core.match(r_code, 'aaayaaay')
+ assert res and res.span() == (0, 4)
#
r_code, r = get_code_and_re(r'(a?){4,6}y')
assert r.match('y')
@@ -162,6 +165,14 @@
res = rsre_core.match(r_code, 'y')
assert res
+ def test_empty_maxuntil_2(self):
+ r_code, r = get_code_and_re(r'X(.*?)+X')
+ assert r.match('XfooXbarX').span() == (0, 5)
+ assert r.match('XfooXbarX').span(1) == (4, 4)
+ res = rsre_core.match(r_code, 'XfooXbarX')
+ assert res.span() == (0, 5)
+ assert res.span(1) == (4, 4)
+
def test_empty_minuntil(self):
r_code, r = get_code_and_re(r'(a?)+?y')
#assert not r.match('z') -- CPython bug (at least 2.5) eats all memory
More information about the pypy-commit
mailing list