[pypy-svn] r16556 - pypy/dist/pypy/module/_sre
nik at codespeak.net
nik at codespeak.net
Thu Aug 25 22:05:20 CEST 2005
Author: nik
Date: Thu Aug 25 22:05:19 2005
New Revision: 16556
Modified:
pypy/dist/pypy/module/_sre/app_sre.py
pypy/dist/pypy/module/_sre/interp_sre.py
Log:
implemented op_repeat_one with new scheme
Modified: pypy/dist/pypy/module/_sre/app_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/app_sre.py (original)
+++ pypy/dist/pypy/module/_sre/app_sre.py Thu Aug 25 22:05:19 2005
@@ -454,69 +454,6 @@
self.executing_contexts[id(context)] = generator
return has_finished
- def op_repeat_one(self, ctx):
- # match repeated sequence (maximizing).
- # this operator only works if the repeated item is exactly one character
- # wide, and we're not already collecting backtracking points.
- # <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail
- mincount = ctx.peek_code(2)
- maxcount = ctx.peek_code(3)
- #self._log(ctx, "REPEAT_ONE", mincount, maxcount)
-
- if ctx.remaining_chars() < mincount:
- ctx.has_matched = NOT_MATCHED
- yield True
- ctx.state.string_position = ctx.string_position
- count = self.count_repetitions(ctx, maxcount)
- ctx.skip_char(count)
- if count < mincount:
- ctx.has_matched = NOT_MATCHED
- yield True
- if ctx.peek_code(ctx.peek_code(1) + 1) == OPCODES["success"]:
- # tail is empty. we're finished
- ctx.state.string_position = ctx.string_position
- ctx.has_matched = MATCHED
- yield True
-
- ctx.state.marks_push()
- if ctx.peek_code(ctx.peek_code(1) + 1) == OPCODES["literal"]:
- # Special case: Tail starts with a literal. Skip positions where
- # the rest of the pattern cannot possibly match.
- char = ctx.peek_code(ctx.peek_code(1) + 2)
- while True:
- while count >= mincount and \
- (ctx.at_end() or ord(ctx.peek_char()) != char):
- ctx.skip_char(-1)
- count -= 1
- if count < mincount:
- break
- ctx.state.string_position = ctx.string_position
- child_context = ctx.push_new_context(ctx.peek_code(1) + 1)
- yield False
- if child_context.has_matched == MATCHED:
- ctx.has_matched = MATCHED
- yield True
- ctx.skip_char(-1)
- count -= 1
- ctx.state.marks_pop_keep()
-
- else:
- # General case: backtracking
- while count >= mincount:
- ctx.state.string_position = ctx.string_position
- child_context = ctx.push_new_context(ctx.peek_code(1) + 1)
- yield False
- if child_context.has_matched == MATCHED:
- ctx.has_matched = MATCHED
- yield True
- ctx.skip_char(-1)
- count -= 1
- ctx.state.marks_pop_keep()
-
- ctx.state.marks_pop_discard()
- ctx.has_matched = NOT_MATCHED
- yield True
-
def op_min_repeat_one(self, ctx):
# match repeated sequence (minimizing)
# <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail
Modified: pypy/dist/pypy/module/_sre/interp_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/interp_sre.py (original)
+++ pypy/dist/pypy/module/_sre/interp_sre.py Thu Aug 25 22:05:19 2005
@@ -13,6 +13,7 @@
# XXX can we import those safely from sre_constants?
SRE_FLAG_LOCALE = 4 # honour system locale
SRE_FLAG_UNICODE = 32 # use unicode locale
+MAXREPEAT = 65535
def getlower(space, w_char_ord, w_flags):
char_ord = space.int_w(w_char_ord)
@@ -454,18 +455,18 @@
def op_branch(space, ctx):
# alternation
# <BRANCH> <0=skip> code <JUMP> ... <NULL>
- if ctx.is_resumed():
- last_branch_length = ctx.restore_values()[0]
+ if not ctx.is_resumed():
+ ctx.state.marks_push()
+ ctx.skip_code(1)
+ current_branch_length = ctx.peek_code(0)
+ else:
if ctx.child_context.has_matched == ctx.MATCHED:
ctx.has_matched = ctx.MATCHED
return True
ctx.state.marks_pop_keep()
+ last_branch_length = ctx.restore_values()[0]
ctx.skip_code(last_branch_length)
current_branch_length = ctx.peek_code(0)
- else:
- ctx.state.marks_push()
- ctx.skip_code(1)
- current_branch_length = ctx.peek_code(0)
if current_branch_length:
ctx.state.string_position = ctx.string_position
ctx.push_new_context(1)
@@ -475,6 +476,58 @@
ctx.has_matched = ctx.NOT_MATCHED
return True
+def op_repeat_one(space, ctx):
+ # match repeated sequence (maximizing).
+ # this operator only works if the repeated item is exactly one character
+ # wide, and we're not already collecting backtracking points.
+ # <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail
+
+ # Case 1: First entry point
+ if not ctx.is_resumed():
+ mincount = ctx.peek_code(2)
+ maxcount = ctx.peek_code(3)
+ if ctx.remaining_chars() < mincount:
+ ctx.has_matched = ctx.NOT_MATCHED
+ return True
+ ctx.state.string_position = ctx.string_position
+ count = count_repetitions(space, ctx, maxcount)
+ ctx.skip_char(count)
+ if count < mincount:
+ ctx.has_matched = ctx.NOT_MATCHED
+ return True
+ if ctx.peek_code(ctx.peek_code(1) + 1) == 1: # 1 == OPCODES["success"]
+ # tail is empty. we're finished
+ ctx.state.string_position = ctx.string_position
+ ctx.has_matched = ctx.MATCHED
+ return True
+ ctx.state.marks_push()
+ # XXX literal optimization missing here
+
+ # Case 2: Repetition is resumed (aka backtracked)
+ else:
+ if ctx.child_context.has_matched == ctx.MATCHED:
+ ctx.has_matched = ctx.MATCHED
+ return True
+ values = ctx.restore_values()
+ mincount = values[0]
+ count = values[1]
+ ctx.skip_char(-1)
+ count -= 1
+ ctx.state.marks_pop_keep()
+
+ # Initialize the actual backtracking
+ if count >= mincount:
+ ctx.state.string_position = ctx.string_position
+ ctx.push_new_context(ctx.peek_code(1) + 1)
+ ctx.backup_value(mincount)
+ ctx.backup_value(count)
+ return False
+
+ # Backtracking failed
+ ctx.state.marks_pop_discard()
+ ctx.has_matched = ctx.NOT_MATCHED
+ return True
+
def op_jump(space, ctx):
# jump forward
# <JUMP>/<INFO> <offset>
@@ -527,6 +580,34 @@
ctx.skip_code(3)
return True
+def count_repetitions(space, ctx, maxcount):
+ """Returns the number of repetitions of a single item, starting from the
+ current string position. The code pointer is expected to point to a
+ REPEAT_ONE operation (with the repeated 4 ahead)."""
+ count = 0
+ real_maxcount = ctx.state.end - ctx.string_position
+ if maxcount < real_maxcount and maxcount != MAXREPEAT:
+ real_maxcount = maxcount
+ # XXX could special case every single character pattern here, as in C.
+ # This is a general solution, a bit hackisch, but works and should be
+ # efficient.
+ code_position = ctx.code_position
+ string_position = ctx.string_position
+ ctx.skip_code(4)
+ reset_position = ctx.code_position
+ while count < real_maxcount:
+ # this works because the single character pattern is followed by
+ # a success opcode
+ ctx.code_position = reset_position
+ opcode_dispatch_table[ctx.peek_code()](space, ctx)
+ if ctx.has_matched == ctx.NOT_MATCHED:
+ break
+ count += 1
+ ctx.has_matched = ctx.UNDECIDED
+ ctx.code_position = code_position
+ ctx.string_position = string_position
+ return count
+
opcode_dispatch_table = [
op_failure, op_success,
op_any, op_any_all,
@@ -547,7 +628,7 @@
None, #NEGATE,
None, #RANGE,
None, #REPEAT,
- None, #REPEAT_ONE,
+ op_repeat_one,
None, #SUBPATTERN,
None, #MIN_REPEAT_ONE
]
More information about the Pypy-commit
mailing list