[pypy-svn] r16556 - pypy/dist/pypy/module/_sre

nik at codespeak.net nik at codespeak.net
Thu Aug 25 22:05:20 CEST 2005


Author: nik
Date: Thu Aug 25 22:05:19 2005
New Revision: 16556

Modified:
   pypy/dist/pypy/module/_sre/app_sre.py
   pypy/dist/pypy/module/_sre/interp_sre.py
Log:
implemented op_repeat_one with new scheme


Modified: pypy/dist/pypy/module/_sre/app_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/app_sre.py	(original)
+++ pypy/dist/pypy/module/_sre/app_sre.py	Thu Aug 25 22:05:19 2005
@@ -454,69 +454,6 @@
             self.executing_contexts[id(context)] = generator
         return has_finished
 
-    def op_repeat_one(self, ctx):
-        # match repeated sequence (maximizing).
-        # this operator only works if the repeated item is exactly one character
-        # wide, and we're not already collecting backtracking points.
-        # <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail
-        mincount = ctx.peek_code(2)
-        maxcount = ctx.peek_code(3)
-        #self._log(ctx, "REPEAT_ONE", mincount, maxcount)
-
-        if ctx.remaining_chars() < mincount:
-            ctx.has_matched = NOT_MATCHED
-            yield True
-        ctx.state.string_position = ctx.string_position
-        count = self.count_repetitions(ctx, maxcount)
-        ctx.skip_char(count)
-        if count < mincount:
-            ctx.has_matched = NOT_MATCHED
-            yield True
-        if ctx.peek_code(ctx.peek_code(1) + 1) == OPCODES["success"]:
-            # tail is empty.  we're finished
-            ctx.state.string_position = ctx.string_position
-            ctx.has_matched = MATCHED
-            yield True
-
-        ctx.state.marks_push()
-        if ctx.peek_code(ctx.peek_code(1) + 1) == OPCODES["literal"]:
-            # Special case: Tail starts with a literal. Skip positions where
-            # the rest of the pattern cannot possibly match.
-            char = ctx.peek_code(ctx.peek_code(1) + 2)
-            while True:
-                while count >= mincount and \
-                                (ctx.at_end() or ord(ctx.peek_char()) != char):
-                    ctx.skip_char(-1)
-                    count -= 1
-                if count < mincount:
-                    break
-                ctx.state.string_position = ctx.string_position
-                child_context = ctx.push_new_context(ctx.peek_code(1) + 1)
-                yield False
-                if child_context.has_matched == MATCHED:
-                    ctx.has_matched = MATCHED
-                    yield True
-                ctx.skip_char(-1)
-                count -= 1
-                ctx.state.marks_pop_keep()
-        
-        else:
-            # General case: backtracking
-            while count >= mincount:
-                ctx.state.string_position = ctx.string_position
-                child_context = ctx.push_new_context(ctx.peek_code(1) + 1)
-                yield False
-                if child_context.has_matched == MATCHED:
-                    ctx.has_matched = MATCHED
-                    yield True
-                ctx.skip_char(-1)
-                count -= 1
-                ctx.state.marks_pop_keep()
-
-        ctx.state.marks_pop_discard()
-        ctx.has_matched = NOT_MATCHED
-        yield True
-
     def op_min_repeat_one(self, ctx):
         # match repeated sequence (minimizing)
         # <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail

Modified: pypy/dist/pypy/module/_sre/interp_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/interp_sre.py	(original)
+++ pypy/dist/pypy/module/_sre/interp_sre.py	Thu Aug 25 22:05:19 2005
@@ -13,6 +13,7 @@
 # XXX can we import those safely from sre_constants?
 SRE_FLAG_LOCALE = 4 # honour system locale
 SRE_FLAG_UNICODE = 32 # use unicode locale
+MAXREPEAT = 65535
 
 def getlower(space, w_char_ord, w_flags):
     char_ord = space.int_w(w_char_ord)
@@ -454,18 +455,18 @@
 def op_branch(space, ctx):
     # alternation
     # <BRANCH> <0=skip> code <JUMP> ... <NULL>
-    if ctx.is_resumed():
-        last_branch_length = ctx.restore_values()[0]
+    if not ctx.is_resumed():
+        ctx.state.marks_push()
+        ctx.skip_code(1)
+        current_branch_length = ctx.peek_code(0)
+    else:
         if ctx.child_context.has_matched == ctx.MATCHED:
             ctx.has_matched = ctx.MATCHED
             return True
         ctx.state.marks_pop_keep()
+        last_branch_length = ctx.restore_values()[0]
         ctx.skip_code(last_branch_length)
         current_branch_length = ctx.peek_code(0)
-    else:
-        ctx.state.marks_push()
-        ctx.skip_code(1)
-        current_branch_length = ctx.peek_code(0)
     if current_branch_length:
         ctx.state.string_position = ctx.string_position
         ctx.push_new_context(1)
@@ -475,6 +476,58 @@
     ctx.has_matched = ctx.NOT_MATCHED
     return True
 
+def op_repeat_one(space, ctx):
+    # match repeated sequence (maximizing).
+    # this operator only works if the repeated item is exactly one character
+    # wide, and we're not already collecting backtracking points.
+    # <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail
+    
+    # Case 1: First entry point
+    if not ctx.is_resumed():
+        mincount = ctx.peek_code(2)
+        maxcount = ctx.peek_code(3)
+        if ctx.remaining_chars() < mincount:
+            ctx.has_matched = ctx.NOT_MATCHED
+            return True
+        ctx.state.string_position = ctx.string_position
+        count = count_repetitions(space, ctx, maxcount)
+        ctx.skip_char(count)
+        if count < mincount:
+            ctx.has_matched = ctx.NOT_MATCHED
+            return True
+        if ctx.peek_code(ctx.peek_code(1) + 1) == 1: # 1 == OPCODES["success"]
+            # tail is empty.  we're finished
+            ctx.state.string_position = ctx.string_position
+            ctx.has_matched = ctx.MATCHED
+            return True
+        ctx.state.marks_push()
+        # XXX literal optimization missing here
+
+    # Case 2: Repetition is resumed (aka backtracked)
+    else:
+        if ctx.child_context.has_matched == ctx.MATCHED:
+            ctx.has_matched = ctx.MATCHED
+            return True
+        values = ctx.restore_values()
+        mincount = values[0]
+        count = values[1]
+        ctx.skip_char(-1)
+        count -= 1
+        ctx.state.marks_pop_keep()
+        
+    # Initialize the actual backtracking
+    if count >= mincount:
+        ctx.state.string_position = ctx.string_position
+        ctx.push_new_context(ctx.peek_code(1) + 1)
+        ctx.backup_value(mincount)
+        ctx.backup_value(count)
+        return False
+
+    # Backtracking failed
+    ctx.state.marks_pop_discard()
+    ctx.has_matched = ctx.NOT_MATCHED
+    return True
+
 def op_jump(space, ctx):
     # jump forward
     # <JUMP>/<INFO> <offset>
@@ -527,6 +580,34 @@
         ctx.skip_code(3)
     return True
 
+def count_repetitions(space, ctx, maxcount):
+    """Returns the number of repetitions of a single item, starting from the
+    current string position. The code pointer is expected to point to a
+    REPEAT_ONE operation (with the repeated 4 ahead)."""
+    count = 0
+    real_maxcount = ctx.state.end - ctx.string_position
+    if maxcount < real_maxcount and maxcount != MAXREPEAT:
+        real_maxcount = maxcount
+    # XXX could special case every single character pattern here, as in C.
+    # This is a general solution, a bit hackisch, but works and should be
+    # efficient.
+    code_position = ctx.code_position
+    string_position = ctx.string_position
+    ctx.skip_code(4)
+    reset_position = ctx.code_position
+    while count < real_maxcount:
+        # this works because the single character pattern is followed by
+        # a success opcode
+        ctx.code_position = reset_position
+        opcode_dispatch_table[ctx.peek_code()](space, ctx)
+        if ctx.has_matched == ctx.NOT_MATCHED:
+            break
+        count += 1
+    ctx.has_matched = ctx.UNDECIDED
+    ctx.code_position = code_position
+    ctx.string_position = string_position
+    return count
+
 opcode_dispatch_table = [
     op_failure, op_success,
     op_any, op_any_all,
@@ -547,7 +628,7 @@
     None, #NEGATE,
     None, #RANGE,
     None, #REPEAT,
-    None, #REPEAT_ONE,
+    op_repeat_one,
     None, #SUBPATTERN,
     None, #MIN_REPEAT_ONE
 ]



More information about the Pypy-commit mailing list