[pypy-svn] r16796 - in pypy/release/0.7.x/pypy/module/_sre: . test

nik at codespeak.net nik at codespeak.net
Sat Aug 27 17:59:08 CEST 2005


Author: nik
Date: Sat Aug 27 17:59:05 2005
New Revision: 16796

Removed:
   pypy/release/0.7.x/pypy/module/_sre/app_info.py
Modified:
   pypy/release/0.7.x/pypy/module/_sre/__init__.py
   pypy/release/0.7.x/pypy/module/_sre/app_sre.py
   pypy/release/0.7.x/pypy/module/_sre/interp_sre.py
   pypy/release/0.7.x/pypy/module/_sre/test/test_app_sre.py
   pypy/release/0.7.x/pypy/module/_sre/test/test_interp_sre.py
Log:
merged current _sre into release branch since it now compiles! by default
the pure app-level module in lib is still used right now.


Modified: pypy/release/0.7.x/pypy/module/_sre/__init__.py
==============================================================================
--- pypy/release/0.7.x/pypy/module/_sre/__init__.py	(original)
+++ pypy/release/0.7.x/pypy/module/_sre/__init__.py	Sat Aug 27 17:59:05 2005
@@ -9,19 +9,16 @@
 """
     
     appleveldefs = {
-        'CODESIZE':       'app_info.CODESIZE',
-        'MAGIC':          'app_info.MAGIC',
-        'copyright':      'app_info.copyright',
-        'getcodesize':    'app_info.getcodesize',
         'compile':        'app_sre.compile',
     }
 
     interpleveldefs = {
-        'getlower':       'interp_sre.getlower',
+        'CODESIZE':       'space.wrap(interp_sre.CODESIZE)',
+        'MAGIC':          'space.wrap(interp_sre.MAGIC)',
+        'copyright':      'space.wrap(interp_sre.copyright)',
+        'getlower':       'interp_sre.w_getlower',
+        'getcodesize':    'interp_sre.w_getcodesize',
         '_State':         'interp_sre.make_state',
-        '_MatchContext':  'interp_sre.make_context',
-        '_RepeatContext': 'interp_sre.make_repeat_context',
-        '_match':         'interp_sre.match',
-        '_opcode_dispatch': 'interp_sre.opcode_dispatch',
-        '_opcode_is_at_interplevel': 'interp_sre.opcode_is_at_interplevel',
+        '_match':         'interp_sre.w_match',
+        '_search':        'interp_sre.w_search',
     }

Modified: pypy/release/0.7.x/pypy/module/_sre/app_sre.py
==============================================================================
--- pypy/release/0.7.x/pypy/module/_sre/app_sre.py	(original)
+++ pypy/release/0.7.x/pypy/module/_sre/app_sre.py	Sat Aug 27 17:59:05 2005
@@ -7,12 +7,8 @@
 copyrighted by: Copyright (c) 1997-2001 by Secret Labs AB
 """
 
-import array, operator, sys
-from sre_constants import ATCODES, OPCODES, CHCODES, MAXREPEAT
-from sre_constants import SRE_INFO_PREFIX, SRE_INFO_LITERAL
-from sre_constants import SRE_FLAG_UNICODE, SRE_FLAG_LOCALE
+import sys
 import _sre
-from _sre import CODESIZE
 
 
 def compile(pattern, flags, code, groups=0, groupindex={}, indexgroup=[None]):
@@ -47,7 +43,7 @@
         instance. Return None if no position in the string matches the
         pattern."""
         state = _sre._State(string, pos, endpos, self.flags)
-        if search(state, self._code):
+        if _sre._search(state, self._code):
             return SRE_Match(self, state)
         else:
             return None
@@ -59,7 +55,7 @@
         while state.start <= state.end:
             state.reset()
             state.string_position = state.start
-            if not search(state, self._code):
+            if not _sre._search(state, self._code):
                 break
             match = SRE_Match(self, state)
             if self.groups == 0 or self.groups == 1:
@@ -86,7 +82,7 @@
         while not count or n < count:
             state.reset()
             state.string_position = state.start
-            if not search(state, self._code):
+            if not _sre._search(state, self._code):
                 break
             if last_pos < state.start:
                 sublist.append(string[last_pos:state.start])
@@ -132,7 +128,7 @@
         while not maxsplit or n < maxsplit:
             state.reset()
             state.string_position = state.start
-            if not search(state, self._code):
+            if not _sre._search(state, self._code):
                 break
             if state.start == state.string_position: # zero-width match
                 if last == state.end:                # or end of string
@@ -185,10 +181,10 @@
         return match
 
     def match(self):
-        return self._match_search(match)
+        return self._match_search(_sre._match)
 
     def search(self):
-        return self._match_search(search)
+        return self._match_search(_sre._search)
 
 
 class SRE_Match(object):
@@ -289,324 +285,3 @@
 
     def __deepcopy__():
         raise TypeError, "cannot copy this pattern object"
-
-
-def search(state, pattern_codes):
-    flags = 0
-    if pattern_codes[0] == OPCODES["info"]:
-        # optimization info block
-        # <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>
-        #if pattern_codes[2] & SRE_INFO_PREFIX and pattern_codes[5] > 1:
-        #    return state.fast_search(pattern_codes)
-        flags = pattern_codes[2]
-        pattern_codes = pattern_codes[pattern_codes[1] + 1:]
-
-    string_position = state.start
-    """
-    if pattern_codes[0] == OPCODES["literal"]:
-        # Special case: Pattern starts with a literal character. This is
-        # used for short prefixes
-        character = pattern_codes[1]
-        while True:
-            while string_position < state.end \
-                    and ord(state.string[string_position]) != character:
-                string_position += 1
-            if string_position >= state.end:
-                return False
-            state.start = string_position
-            string_position += 1
-            state.string_position = string_position
-            if flags & SRE_INFO_LITERAL:
-                return True
-            if match(state, pattern_codes[2:]):
-                return True
-        return False
-    """
-
-    # General case
-    while string_position <= state.end:
-        state.reset()
-        state.start = state.string_position = string_position
-        if _sre._match(state, pattern_codes):
-            return True
-        string_position += 1
-    return False
-
-
-def fast_search(state, pattern_codes):
-    """Skips forward in a string as fast as possible using information from
-    an optimization info block."""
-    # pattern starts with a known prefix
-    # <5=length> <6=skip> <7=prefix data> <overlap data>
-    flags = pattern_codes[2]
-    prefix_len = pattern_codes[5]
-    prefix_skip = pattern_codes[6] # don't really know what this is good for
-    prefix = pattern_codes[7:7 + prefix_len]
-    overlap = pattern_codes[7 + prefix_len - 1:pattern_codes[1] + 1]
-    pattern_codes = pattern_codes[pattern_codes[1] + 1:]
-    i = 0
-    string_position = state.string_position
-    while string_position < state.end:
-        while True:
-            if ord(state.string[string_position]) != prefix[i]:
-                if i == 0:
-                    break
-                else:
-                    i = overlap[i]
-            else:
-                i += 1
-                if i == prefix_len:
-                    # found a potential match
-                    state.start = string_position + 1 - prefix_len
-                    state.string_position = string_position + 1 \
-                                                 - prefix_len + prefix_skip
-                    if flags & SRE_INFO_LITERAL:
-                        return True # matched all of pure literal pattern
-                    if _sre._match(state, pattern_codes[2 * prefix_skip:]):
-                        return True
-                    i = overlap[i]
-                break
-        string_position += 1
-    return False
-
-# XXX temporary constants for MatchContext.has_matched
-UNDECIDED = 0
-MATCHED = 1
-NOT_MATCHED = 2
-
-def match(state, pattern_codes):
-    # Optimization: Check string length. pattern_codes[3] contains the
-    # minimum length for a string to possibly match.
-    if pattern_codes[0] == OPCODES["info"] and pattern_codes[3]:
-        if state.end - state.string_position < pattern_codes[3]:
-            #_log("reject (got %d chars, need %d)"
-            #         % (state.end - state.string_position, pattern_codes[3]))
-            return False
-    
-    dispatcher = _OpcodeDispatcher()
-    state.context_stack.append(_sre._MatchContext(state, pattern_codes))
-    has_matched = UNDECIDED
-    while len(state.context_stack) > 0:
-        context = state.context_stack[-1]
-        has_matched = dispatcher.match(context)
-        if has_matched != UNDECIDED: # don't pop if context isn't done
-            state.context_stack.pop()
-    return has_matched == MATCHED
-
-
-class _Dispatcher(object):
-
-    DISPATCH_TABLE = None
-
-    def dispatch(self, code, context):
-        method = self.DISPATCH_TABLE.get(code, self.__class__.unknown)
-        return method(self, context)
-
-    def unknown(self, code, ctx):
-        raise NotImplementedError()
-
-    def build_dispatch_table(cls, code_dict, method_prefix):
-        if cls.DISPATCH_TABLE is not None:
-            return
-        table = {}
-        for key, value in code_dict.items():
-            if hasattr(cls, "%s%s" % (method_prefix, key)):
-                table[value] = getattr(cls, "%s%s" % (method_prefix, key))
-        cls.DISPATCH_TABLE = table
-
-    build_dispatch_table = classmethod(build_dispatch_table)
-
-
-class _OpcodeDispatcher(_Dispatcher):
-    
-    def __init__(self):
-        self.executing_contexts = {}
-        
-    def match(self, context):
-        """Returns True if the current context matches, False if it doesn't and
-        None if matching is not finished, ie must be resumed after child
-        contexts have been matched."""
-        while context.remaining_codes() > 0 and context.has_matched == UNDECIDED:
-            opcode = context.peek_code()
-            if not self.dispatch(opcode, context):
-                return UNDECIDED
-        if context.has_matched == UNDECIDED:
-            context.has_matched = NOT_MATCHED
-        return context.has_matched
-
-    def dispatch(self, opcode, context):
-        """Dispatches a context on a given opcode. Returns True if the context
-        is done matching, False if it must be resumed when next encountered."""
-        if self.executing_contexts.has_key(id(context)):
-            generator = self.executing_contexts[id(context)]
-            del self.executing_contexts[id(context)]
-            has_finished = generator.next()
-        else:
-            if _sre._opcode_is_at_interplevel(opcode):
-                has_finished = _sre._opcode_dispatch(opcode, context)
-            else:
-                method = self.DISPATCH_TABLE.get(opcode, _OpcodeDispatcher.unknown)
-                has_finished = method(self, context)
-            if hasattr(has_finished, "next"): # avoid using the types module
-                generator = has_finished
-                has_finished = generator.next()
-        if not has_finished:
-            self.executing_contexts[id(context)] = generator
-        return has_finished
-
-    def op_repeat(self, ctx):
-        # create repeat context.  all the hard work is done by the UNTIL
-        # operator (MAX_UNTIL, MIN_UNTIL)
-        # <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail
-        #self._log(ctx, "REPEAT", ctx.peek_code(2), ctx.peek_code(3))
-        repeat = _sre._RepeatContext(ctx)
-        ctx.state.repeat = repeat
-        ctx.state.string_position = ctx.string_position
-        child_context = ctx.push_new_context(ctx.peek_code(1) + 1)
-        yield False
-        ctx.state.repeat = repeat.previous
-        ctx.has_matched = child_context.has_matched
-        yield True
-
-    def op_max_until(self, ctx):
-        # maximizing repeat
-        # <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail
-        repeat = ctx.state.repeat
-        if repeat is None:
-            raise RuntimeError("Internal re error: MAX_UNTIL without REPEAT.")
-        mincount = repeat.peek_code(2)
-        maxcount = repeat.peek_code(3)
-        ctx.state.string_position = ctx.string_position
-        count = repeat.count + 1
-        #self._log(ctx, "MAX_UNTIL", count)
-
-        if count < mincount:
-            # not enough matches
-            repeat.count = count
-            child_context = repeat.push_new_context(4)
-            yield False
-            ctx.has_matched = child_context.has_matched
-            if ctx.has_matched == NOT_MATCHED:
-                repeat.count = count - 1
-                ctx.state.string_position = ctx.string_position
-            yield True
-
-        if (count < maxcount or maxcount == MAXREPEAT) \
-                      and ctx.state.string_position != repeat.last_position:
-            # we may have enough matches, if we can match another item, do so
-            repeat.count = count
-            ctx.state.marks_push()
-            save_last_position = repeat.last_position # zero-width match protection
-            repeat.last_position = ctx.state.string_position
-            child_context = repeat.push_new_context(4)
-            yield False
-            repeat.last_position = save_last_position
-            if child_context.has_matched == MATCHED:
-                ctx.state.marks_pop_discard()
-                ctx.has_matched = MATCHED
-                yield True
-            ctx.state.marks_pop()
-            repeat.count = count - 1
-            ctx.state.string_position = ctx.string_position
-
-        # cannot match more repeated items here.  make sure the tail matches
-        ctx.state.repeat = repeat.previous
-        child_context = ctx.push_new_context(1)
-        yield False
-        ctx.has_matched = child_context.has_matched
-        if ctx.has_matched == NOT_MATCHED:
-            ctx.state.repeat = repeat
-            ctx.state.string_position = ctx.string_position
-        yield True
-
-    def op_min_until(self, ctx):
-        # minimizing repeat
-        # <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail
-        repeat = ctx.state.repeat
-        if repeat is None:
-            raise RuntimeError("Internal re error: MIN_UNTIL without REPEAT.")
-        mincount = repeat.peek_code(2)
-        maxcount = repeat.peek_code(3)
-        ctx.state.string_position = ctx.string_position
-        count = repeat.count + 1
-        #self._log(ctx, "MIN_UNTIL", count)
-
-        if count < mincount:
-            # not enough matches
-            repeat.count = count
-            child_context = repeat.push_new_context(4)
-            yield False
-            ctx.has_matched = child_context.has_matched
-            if ctx.has_matched == NOT_MATCHED:
-                repeat.count = count - 1
-                ctx.state.string_position = ctx.string_position
-            yield True
-
-        # see if the tail matches
-        ctx.state.marks_push()
-        ctx.state.repeat = repeat.previous
-        child_context = ctx.push_new_context(1)
-        yield False
-        if child_context.has_matched == MATCHED:
-            ctx.has_matched = MATCHED
-            yield True
-        ctx.state.repeat = repeat
-        ctx.state.string_position = ctx.string_position
-        ctx.state.marks_pop()
-
-        # match more until tail matches
-        if count >= maxcount and maxcount != MAXREPEAT:
-            ctx.has_matched = NOT_MATCHED
-            yield True
-        repeat.count = count
-        child_context = repeat.push_new_context(4)
-        yield False
-        ctx.has_matched = child_context.has_matched
-        if ctx.has_matched == NOT_MATCHED:
-            repeat.count = count - 1
-            ctx.state.string_position = ctx.string_position
-        yield True
-        
-    def unknown(self, ctx):
-        #self._log(ctx, "UNKNOWN", ctx.peek_code())
-        raise RuntimeError("Internal re error. Unknown opcode: %s" % ctx.peek_code())
-    
-    def count_repetitions(self, ctx, maxcount):
-        """Returns the number of repetitions of a single item, starting from the
-        current string position. The code pointer is expected to point to a
-        REPEAT_ONE operation (with the repeated 4 ahead)."""
-        count = 0
-        real_maxcount = ctx.state.end - ctx.string_position
-        if maxcount < real_maxcount and maxcount != MAXREPEAT:
-            real_maxcount = maxcount
-        # XXX could special case every single character pattern here, as in C.
-        # This is a general solution, a bit hackisch, but works and should be
-        # efficient.
-        code_position = ctx.code_position
-        string_position = ctx.string_position
-        ctx.skip_code(4)
-        reset_position = ctx.code_position
-        while count < real_maxcount:
-            # this works because the single character pattern is followed by
-            # a success opcode
-            ctx.code_position = reset_position
-            self.dispatch(ctx.peek_code(), ctx)
-            if ctx.has_matched == NOT_MATCHED: # could be None as well
-                break
-            count += 1
-        ctx.has_matched = UNDECIDED
-        ctx.code_position = code_position
-        ctx.string_position = string_position
-        return count
-
-    def _log(self, context, opname, *args):
-        arg_string = ("%s " * len(args)) % args
-        _log("|%s|%s|%s %s" % (context.pattern_codes,
-            context.string_position, opname, arg_string))
-
-_OpcodeDispatcher.build_dispatch_table(OPCODES, "op_")
-
-
-def _log(message):
-    if 0:
-        print message

Modified: pypy/release/0.7.x/pypy/module/_sre/interp_sre.py
==============================================================================
--- pypy/release/0.7.x/pypy/module/_sre/interp_sre.py	(original)
+++ pypy/release/0.7.x/pypy/module/_sre/interp_sre.py	Sat Aug 27 17:59:05 2005
@@ -1,38 +1,57 @@
-from pypy.interpreter.baseobjspace import ObjSpace, Wrappable
-# XXX is it allowed to import app-level module like this?
-from pypy.module._sre.app_info import CODESIZE
+from pypy.interpreter.baseobjspace import Wrappable
 from pypy.interpreter.typedef import GetSetProperty, TypeDef
 from pypy.interpreter.typedef import interp_attrproperty, interp_attrproperty_w
 from pypy.interpreter.gateway import interp2app
-
 import sys
-BIG_ENDIAN = sys.byteorder == "big"
 
-#### Exposed functions
+#### Constants and exposed functions
+
+# Identifying as _sre from Python 2.3 or 2.4
+MAGIC = 20031017
+
+# In _sre.c this is bytesize of the code word type of the C implementation.
+# There it's 2 for normal Python builds and more for wide unicode builds (large 
+# enough to hold a 32-bit UCS-4 encoded character). Since here in pure Python
+# we only see re bytecodes as Python longs, we shouldn't have to care about the
+# codesize. But sre_compile will compile some stuff differently depending on the
+# codesize (e.g., charsets).
+if sys.maxunicode == 65535:
+    CODESIZE = 2
+else:
+    CODESIZE = 4
+
+copyright = "_sre.py 2.4 Copyright 2005 by Nik Haldimann"
+
+BIG_ENDIAN = sys.byteorder == "big"
 
 # XXX can we import those safely from sre_constants?
+SRE_INFO_PREFIX = 1
+SRE_INFO_LITERAL = 2
 SRE_FLAG_LOCALE = 4 # honour system locale
 SRE_FLAG_UNICODE = 32 # use unicode locale
+OPCODE_INFO = 17
+OPCODE_LITERAL = 19
 MAXREPEAT = 65535
 
-def getlower(space, w_char_ord, w_flags):
-    char_ord = space.int_w(w_char_ord)
-    flags = space.int_w(w_flags)
+def w_getlower(space, w_char_ord, w_flags):
+    return space.wrap(getlower(space, space.int_w(w_char_ord), space.int_w(w_flags)))
+
+def getlower(space, char_ord, flags):
     if (char_ord < 128) or (flags & SRE_FLAG_UNICODE) \
                               or (flags & SRE_FLAG_LOCALE and char_ord < 256):
         w_uni_char = space.newunicode([char_ord])
         w_lowered = space.call_method(w_uni_char, "lower")
-        return space.ord(w_lowered)
+        return space.int_w(space.ord(w_lowered))
     else:
-        return space.wrap(char_ord)
+        return char_ord
 
-#### Core classes
+def w_getcodesize(space):
+    return space.wrap(CODESIZE)
 
-# XXX the wrapped/unwrapped semantics of the following classes are currently
-# very confusing because they are still used at app-level.
+#### Core classes
 
 def make_state(space, w_string, w_start, w_end, w_flags):
-    # XXX Uhm, temporary
+    # XXX maybe turn this into a __new__ method of W_State
     return space.wrap(W_State(space, w_string, w_start, w_end, w_flags))
 
 class W_State(Wrappable):
@@ -51,14 +70,28 @@
         self.end = end
         self.pos = start
         self.flags = space.int_w(w_flags)
-        self.reset()
+        self.w_reset()
 
-    def reset(self):
+    def w_reset(self):
         self.marks = []
         self.lastindex = -1
         self.marks_stack = []
         self.context_stack = []
-        self.w_repeat = self.space.w_None
+        self.repeat = None
+
+    def w_create_regs(self, w_group_count):
+        """Creates a tuple of index pairs representing matched groups, a format
+        that's convenient for SRE_Match."""
+        regs = [self.space.newtuple([self.space.wrap(self.start), self.space.wrap(self.string_position)])]
+        for group in range(self.space.int_w(w_group_count)):
+            mark_index = 2 * group
+            if mark_index + 1 < len(self.marks):
+                regs.append(self.space.newtuple([self.space.wrap(self.marks[mark_index]),
+                                                 self.space.wrap(self.marks[mark_index + 1])]))
+            else:
+                regs.append(self.space.newtuple([self.space.wrap(-1),
+                                                        self.space.wrap(-1)]))
+        return self.space.newtuple(regs)
 
     def set_mark(self, mark_nr, position):
         if mark_nr & 1:
@@ -75,20 +108,6 @@
         else:
             return -1, -1
 
-    def create_regs(self, w_group_count):
-        """Creates a tuple of index pairs representing matched groups, a format
-        that's convenient for SRE_Match."""
-        regs = [self.space.newtuple([self.space.wrap(self.start), self.space.wrap(self.string_position)])]
-        for group in range(self.space.int_w(w_group_count)):
-            mark_index = 2 * group
-            if mark_index + 1 < len(self.marks):
-                regs.append(self.space.newtuple([self.space.wrap(self.marks[mark_index]),
-                                                 self.space.wrap(self.marks[mark_index + 1])]))
-            else:
-                regs.append(self.space.newtuple([self.space.wrap(-1),
-                                                        self.space.wrap(-1)]))
-        return self.space.newtuple(regs)
-
     def marks_push(self):
         self.marks_stack.append((self.marks[:], self.lastindex))
 
@@ -102,65 +121,48 @@
         self.marks_stack.pop()
 
     def lower(self, char_ord):
-        return self.space.int_w(self.w_lower(self.space.wrap(char_ord)))
+        return getlower(self.space, char_ord, self.flags)
+
+    # Accessors for the typedef
+    
+    def fget_start(space, self):
+        return space.wrap(self.start)
+
+    def fset_start(space, self, w_value):
+        self.start = space.int_w(w_value)
 
-    def w_lower(self, w_char_ord):
-        return getlower(self.space, w_char_ord, self.space.wrap(self.flags))
+    def fget_string_position(space, self):
+        return space.wrap(self.string_position)
 
-def interp_attrproperty_int(name, cls):
-    "NOT_RPYTHON: initialization-time only"
-    def fget(space, obj):
-        return space.wrap(getattr(obj, name))
-    def fset(space, obj, w_value):
-        setattr(obj, name, space.int_w(w_value))
-    return GetSetProperty(fget, fset, cls=cls)
-
-def interp_attrproperty_list_w(name, cls):
-    "NOT_RPYTHON: initialization-time only"
-    def fget(space, obj):
-        return space.newlist(getattr(obj, name))
-    return GetSetProperty(fget, cls=cls)
-
-def interp_attrproperty_obj_w(name, cls):
-    "NOT_RPYTHON: initialization-time only"
-    def fget(space, obj):
-        return getattr(obj, name)
-    def fset(space, obj, w_value):
-        setattr(obj, name, w_value)
-    return GetSetProperty(fget, fset, cls=cls)
+    def fset_string_position(space, self, w_value):
+        self.start = space.int_w(w_value)
+
+getset_start = GetSetProperty(W_State.fget_start, W_State.fset_start, cls=W_State)
+getset_string_position = GetSetProperty(W_State.fget_string_position,
+                                     W_State.fset_string_position, cls=W_State)
 
 W_State.typedef = TypeDef("W_State",
-    string = interp_attrproperty_obj_w("w_string", W_State),
-    start = interp_attrproperty_int("start", W_State),
-    end = interp_attrproperty_int("end", W_State),
-    string_position = interp_attrproperty_int("string_position", W_State),
+    string = interp_attrproperty_w("w_string", W_State),
+    start = getset_start,
+    end = interp_attrproperty("end", W_State),
+    string_position = getset_string_position,
     pos = interp_attrproperty("pos", W_State),
     lastindex = interp_attrproperty("lastindex", W_State),
-    repeat = interp_attrproperty_obj_w("w_repeat", W_State),
-    reset = interp2app(W_State.reset),
-    create_regs = interp2app(W_State.create_regs),
-    marks_push = interp2app(W_State.marks_push),
-    marks_pop = interp2app(W_State.marks_pop),
-    marks_pop_keep = interp2app(W_State.marks_pop_keep),
-    marks_pop_discard = interp2app(W_State.marks_pop_discard),
-    lower = interp2app(W_State.w_lower),
+    reset = interp2app(W_State.w_reset),
+    create_regs = interp2app(W_State.w_create_regs),
 )
 
-def make_context(space, w_state, w_pattern_codes):
-    # XXX Uhm, temporary
-    return space.wrap(W_MatchContext(space, w_state, w_pattern_codes))
-
-class W_MatchContext(Wrappable):
+class MatchContext:
 
     UNDECIDED = 0
     MATCHED = 1
     NOT_MATCHED = 2
 
-    def __init__(self, space, w_state, w_pattern_codes):
+    def __init__(self, space, state, pattern_codes):
         self.space = space
-        self.state = w_state
-        self.pattern_codes_w = space.unpackiterable(w_pattern_codes)
-        self.string_position = w_state.string_position
+        self.state = state
+        self.pattern_codes = pattern_codes
+        self.string_position = state.string_position
         self.code_position = 0
         self.has_matched = self.UNDECIDED
         self.backup = []
@@ -170,12 +172,13 @@
         """Creates a new child context of this context and pushes it on the
         stack. pattern_offset is the offset off the current code position to
         start interpreting from."""
-        pattern_codes_w = self.pattern_codes_w[self.code_position + pattern_offset:]
-        w_child_context = self.space.wrap(W_MatchContext(self.space, self.state,
-                                           self.space.newlist(pattern_codes_w)))
-        self.state.context_stack.append(w_child_context)
-        self.child_context = w_child_context
-        return w_child_context
+        offset = self.code_position + pattern_offset
+        assert offset >= 0
+        pattern_codes = self.pattern_codes[offset:]
+        child_context = MatchContext(self.space, self.state, pattern_codes)
+        self.state.context_stack.append(child_context)
+        self.child_context = child_context
+        return child_context
 
     def is_resumed(self):
         return self.resume_at_opcode > -1
@@ -188,42 +191,28 @@
         self.backup = []
         return values
 
-    def peek_char(self, w_peek=0):
-        # XXX temporary hack
-        if w_peek == 0:
-            w_peek = self.space.wrap(0)
+    def peek_char(self, peek=0):
         return self.space.getitem(self.state.w_string,
-                self.space.add(self.space.wrap(self.string_position), w_peek))
+                                   self.space.wrap(self.string_position + peek))
 
     def peek_char_ord(self, peek=0):
-        return self.space.int_w(self.space.ord(self.peek_char(self.space.wrap(peek))))
+        # XXX this is not very nice
+        return self.space.int_w(self.space.ord(self.peek_char(peek)))
 
     def skip_char(self, skip_count):
         self.string_position = self.string_position + skip_count
 
-    def w_skip_char(self, w_skip_count):
-        self.skip_char(self.space.int_w(w_skip_count))
-
     def remaining_chars(self):
         return self.state.end - self.string_position
 
-    def w_remaining_chars(self):
-        return self.space.wrap(self.remaining_chars())
-
     def peek_code(self, peek=0):
-        return self.space.int_w(self.pattern_codes_w[self.code_position + peek])
-
-    def w_peek_code(self, w_peek=0):
-        return self.space.wrap(self.peek_code(self.space.int_w(w_peek)))
+        return self.pattern_codes[self.code_position + peek]
 
     def skip_code(self, skip_count):
         self.code_position = self.code_position + skip_count
 
-    def w_skip_code(self, w_skip_count):
-        self.skip_code(self.space.int_w(w_skip_count))
-
     def remaining_codes(self):
-        return self.space.wrap(len(self.pattern_codes_w) - self.code_position)
+        return len(self.pattern_codes) - self.code_position
 
     def at_beginning(self):
         return self.string_position == 0
@@ -231,9 +220,6 @@
     def at_end(self):
         return self.string_position == self.state.end
 
-    def w_at_end(self):
-        return self.space.newbool(self.at_end())
-
     def at_linebreak(self):
         return not self.at_end() and is_linebreak(self.space, self.peek_char())
 
@@ -241,58 +227,114 @@
         if self.at_beginning() and self.at_end():
             return False
         that = not self.at_beginning() \
-                            and word_checker(self.space, self.peek_char(self.space.wrap(-1)))
+                            and word_checker(self.space, self.peek_char(-1))
         this = not self.at_end() \
                             and word_checker(self.space, self.peek_char())
         return this != that
 
-W_MatchContext.typedef = TypeDef("W_MatchContext",
-    state = interp_attrproperty_w("state", W_MatchContext),
-    string_position = interp_attrproperty_int("string_position", W_MatchContext),
-    pattern_codes = interp_attrproperty_list_w("pattern_codes_w", W_MatchContext),
-    code_position = interp_attrproperty_int("code_position", W_MatchContext),
-    has_matched = interp_attrproperty_int("has_matched", W_MatchContext),
-    #push_new_context = interp2app(W_MatchContext.push_new_context),
-    peek_char = interp2app(W_MatchContext.peek_char),
-    skip_char = interp2app(W_MatchContext.w_skip_char),
-    remaining_chars = interp2app(W_MatchContext.w_remaining_chars),
-    peek_code = interp2app(W_MatchContext.w_peek_code),
-    skip_code = interp2app(W_MatchContext.w_skip_code),
-    remaining_codes = interp2app(W_MatchContext.remaining_codes),
-    at_end = interp2app(W_MatchContext.w_at_end),
-)
-
-def make_repeat_context(space, w_context):
-    # XXX Uhm, temporary
-    return space.wrap(W_RepeatContext(space, w_context))
 
-class W_RepeatContext(W_MatchContext):
+class RepeatContext(MatchContext):
     
-    def __init__(self, space, w_context):
-        W_MatchContext.__init__(self, space, w_context.state,
-            space.newlist(w_context.pattern_codes_w[w_context.code_position:]))
-        self.w_count = space.wrap(-1)
-        self.w_previous = w_context.state.w_repeat
-        self.w_last_position = space.w_None
-
-W_RepeatContext.typedef = TypeDef("W_RepeatContext", W_MatchContext.typedef,
-    count = interp_attrproperty_obj_w("w_count", W_RepeatContext),
-    previous = interp_attrproperty_obj_w("w_previous", W_RepeatContext),
-    last_position = interp_attrproperty_obj_w("w_last_position", W_RepeatContext),
-)
+    def __init__(self, space, context):
+        offset = context.code_position
+        assert offset >= 0
+        MatchContext.__init__(self, space, context.state,
+                                context.pattern_codes[offset:])
+        self.count = -1
+        self.previous = context.state.repeat
+        self.last_position = -1
+        self.repeat_stack = []
+
 
 #### Main opcode dispatch loop
 
-def match(space, w_state, w_pattern_codes):
+def w_search(space, w_state, w_pattern_codes):
+    assert isinstance(w_state, W_State)
+    pattern_codes = [space.int_w(code) for code
+                                    in space.unpackiterable(w_pattern_codes)]
+    return space.newbool(search(space, w_state, pattern_codes))
+
+def search(space, state, pattern_codes):
+    flags = 0
+    if pattern_codes[0] == OPCODE_INFO:
+        # optimization info block
+        # <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>
+        if pattern_codes[2] & SRE_INFO_PREFIX and pattern_codes[5] > 1:
+            return fast_search(space, state, pattern_codes)
+        flags = pattern_codes[2]
+        offset = pattern_codes[1] + 1
+        assert offset >= 0
+        pattern_codes = pattern_codes[offset:]
+
+    string_position = state.start
+    while string_position <= state.end:
+        state.w_reset()
+        state.start = state.string_position = string_position
+        if match(space, state, pattern_codes):
+            return True
+        string_position += 1
+    return False
+
+def fast_search(space, state, pattern_codes):
+    """Skips forward in a string as fast as possible using information from
+    an optimization info block."""
+    # pattern starts with a known prefix
+    # <5=length> <6=skip> <7=prefix data> <overlap data>
+    flags = pattern_codes[2]
+    prefix_len = pattern_codes[5]
+    assert prefix_len >= 0
+    prefix_skip = pattern_codes[6] # don't really know what this is good for
+    assert prefix_skip >= 0
+    prefix = pattern_codes[7:7 + prefix_len]
+    overlap_offset = 7 + prefix_len - 1
+    overlap_stop = pattern_codes[1] + 1
+    assert overlap_offset >= 0
+    assert overlap_stop >= 0
+    overlap = pattern_codes[overlap_offset:overlap_stop]
+    pattern_offset = pattern_codes[1] + 1
+    assert pattern_offset >= 0
+    pattern_codes = pattern_codes[pattern_offset:]
+    i = 0
+    string_position = state.string_position
+    while string_position < state.end:
+        while True:
+            char_ord = space.int_w(space.ord(
+                space.getitem(state.w_string, space.wrap(string_position))))
+            if char_ord != prefix[i]:
+                if i == 0:
+                    break
+                else:
+                    i = overlap[i]
+            else:
+                i += 1
+                if i == prefix_len:
+                    # found a potential match
+                    state.start = string_position + 1 - prefix_len
+                    state.string_position = string_position + 1 \
+                                                 - prefix_len + prefix_skip
+                    if flags & SRE_INFO_LITERAL:
+                        return True # matched all of pure literal pattern
+                    if match(space, state, pattern_codes[2 * prefix_skip:]):
+                        return True
+                    i = overlap[i]
+                break
+        string_position += 1
+    return False
+
+def w_match(space, w_state, w_pattern_codes):
+    assert isinstance(w_state, W_State)
+    pattern_codes = [space.int_w(code) for code
+                                    in space.unpackiterable(w_pattern_codes)]
+    return space.newbool(match(space, w_state, pattern_codes))
+
+def match(space, state, pattern_codes):
     # Optimization: Check string length. pattern_codes[3] contains the
     # minimum length for a string to possibly match.
-    # XXX disabled for now
-    #if pattern_codes[0] == OPCODES["info"] and pattern_codes[3]:
-    #    if state.end - state.string_position < pattern_codes[3]:
-    #        return False
-    state = w_state
-    state.context_stack.append(W_MatchContext(space, state, w_pattern_codes))
-    has_matched = W_MatchContext.UNDECIDED
+    if pattern_codes[0] == OPCODE_INFO and pattern_codes[3] > 0:
+        if state.end - state.string_position < pattern_codes[3]:
+            return False
+    state.context_stack.append(MatchContext(space, state, pattern_codes))
+    has_matched = MatchContext.UNDECIDED
     while len(state.context_stack) > 0:
         context = state.context_stack[-1]
         if context.has_matched == context.UNDECIDED:
@@ -301,7 +343,7 @@
             has_matched = context.has_matched
         if has_matched != context.UNDECIDED: # don't pop if context isn't done
             state.context_stack.pop()
-    return space.newbool(has_matched == context.MATCHED)
+    return has_matched == MatchContext.MATCHED
 
 def dispatch_loop(space, context):
     """Returns MATCHED if the current context matches, NOT_MATCHED if it doesn't
@@ -312,10 +354,10 @@
             opcode = context.resume_at_opcode
         else:
             opcode = context.peek_code()
-        #try:
-        has_finished = opcode_dispatch_table[opcode](space, context)
-        #except IndexError:
-        #    raise RuntimeError("Internal re error. Unknown opcode: %s" % opcode)
+        try:
+            has_finished = opcode_dispatch_table[opcode](space, context)
+        except IndexError:
+            raise RuntimeError("Internal re error. Unknown opcode: %s" % opcode)
         if not has_finished:
             context.resume_at_opcode = opcode
             return context.UNDECIDED
@@ -324,19 +366,6 @@
         context.has_matched = context.NOT_MATCHED
     return context.has_matched
 
-def opcode_dispatch(space, w_opcode, w_context):
-    opcode = space.int_w(w_opcode)
-    if opcode >= len(opcode_dispatch_table):
-        return space.newbool(False)
-    return space.newbool(opcode_dispatch_table[opcode](space, w_context))
-
-def opcode_is_at_interplevel(space, w_opcode):
-    opcode = space.int_w(w_opcode)
-    try:
-        return space.newbool(opcode_dispatch_table[opcode] is not None)
-    except IndexError:
-        return space.newbool(False)
-
 def op_success(space, ctx):
     # end of pattern
     ctx.state.string_position = ctx.string_position
@@ -432,7 +461,7 @@
         return
     skip = ctx.peek_code(1)
     ctx.skip_code(2) # set op pointer to the set code
-    char_code = space.int_w(space.ord(ctx.peek_char()))
+    char_code = ctx.peek_char_ord()
     if ignore:
         char_code = ctx.state.lower(char_code)
     if not check_charset(space, char_code, ctx):
@@ -587,6 +616,201 @@
     ctx.has_matched = ctx.NOT_MATCHED
     return True
 
+def op_repeat(space, ctx):
+    # create repeat context.  all the hard work is done by the UNTIL
+    # operator (MAX_UNTIL, MIN_UNTIL)
+    # <REPEAT> <skip> <1=min> <2=max> item <UNTIL> tail
+    if not ctx.is_resumed():
+        ctx.repeat = RepeatContext(space, ctx)
+        ctx.state.repeat = ctx.repeat
+        ctx.state.string_position = ctx.string_position
+        ctx.push_new_context(ctx.peek_code(1) + 1)
+        return False
+    else:
+        ctx.state.repeat = ctx.repeat
+        ctx.has_matched = ctx.child_context.has_matched
+        return True
+
+def op_max_until(space, ctx):
+    # maximizing repeat
+    # <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail
+    
+    # Case 1: First entry point
+    if not ctx.is_resumed():
+        repeat = ctx.state.repeat
+        if repeat is None:
+            raise RuntimeError("Internal re error: MAX_UNTIL without REPEAT.")
+        mincount = repeat.peek_code(2)
+        maxcount = repeat.peek_code(3)
+        ctx.state.string_position = ctx.string_position
+        count = repeat.count + 1
+        if count < mincount:
+            # not enough matches
+            repeat.count = count
+            repeat.repeat_stack.append(repeat.push_new_context(4))
+            ctx.backup_value(mincount)
+            ctx.backup_value(maxcount)
+            ctx.backup_value(count)
+            ctx.backup_value(0) # Dummy for last_position
+            ctx.backup_value(0)
+            ctx.repeat = repeat
+            return False
+        if (count < maxcount or maxcount == MAXREPEAT) \
+                        and ctx.state.string_position != repeat.last_position:
+            # we may have enough matches, if we can match another item, do so
+            repeat.count = count
+            ctx.state.marks_push()
+            repeat.last_position = ctx.state.string_position
+            repeat.repeat_stack.append(repeat.push_new_context(4))
+            ctx.backup_value(mincount)
+            ctx.backup_value(maxcount)
+            ctx.backup_value(count)
+            ctx.backup_value(repeat.last_position) # zero-width match protection
+            ctx.backup_value(2) # more matching
+            ctx.repeat = repeat
+            return False
+
+        # Cannot match more repeated items here. Make sure the tail matches.
+        ctx.state.repeat = repeat.previous
+        ctx.push_new_context(1)
+        ctx.backup_value(mincount)
+        ctx.backup_value(maxcount)
+        ctx.backup_value(count)
+        ctx.backup_value(repeat.last_position) # zero-width match protection
+        ctx.backup_value(1) # tail matching
+        ctx.repeat = repeat
+        return False
+
+    # Case 2: Resumed
+    else:
+        repeat = ctx.repeat
+        values = ctx.restore_values()
+        mincount = values[0]
+        maxcount = values[1]
+        count = values[2]
+        save_last_position = values[3]
+        tail_matching = values[4]
+        
+        if tail_matching == 0:
+            ctx.has_matched = repeat.repeat_stack.pop().has_matched
+            if ctx.has_matched == ctx.NOT_MATCHED:
+                repeat.count = count - 1
+                ctx.state.string_position = ctx.string_position
+            return True
+        elif tail_matching == 2:
+            repeat.last_position = save_last_position
+            if repeat.repeat_stack.pop().has_matched == ctx.MATCHED:
+                ctx.state.marks_pop_discard()
+                ctx.has_matched = ctx.MATCHED
+                return True
+            ctx.state.marks_pop()
+            repeat.count = count - 1
+            ctx.state.string_position = ctx.string_position
+
+            # Cannot match more repeated items here. Make sure the tail matches.
+            ctx.state.repeat = repeat.previous
+            ctx.push_new_context(1)
+            ctx.backup_value(mincount)
+            ctx.backup_value(maxcount)
+            ctx.backup_value(count)
+            ctx.backup_value(repeat.last_position) # zero-width match protection
+            ctx.backup_value(1) # tail matching
+            return False
+
+        else: # resuming after tail matching
+            ctx.has_matched = ctx.child_context.has_matched
+            if ctx.has_matched == ctx.NOT_MATCHED:
+                ctx.state.repeat = repeat
+                ctx.state.string_position = ctx.string_position
+            return True
+
+def op_min_until(space, ctx):
+    # minimizing repeat
+    # <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail
+    
+    # Case 1: First entry point
+    if not ctx.is_resumed():
+        repeat = ctx.state.repeat
+        if repeat is None:
+            raise RuntimeError("Internal re error: MIN_UNTIL without REPEAT.")
+        mincount = repeat.peek_code(2)
+        maxcount = repeat.peek_code(3)
+        ctx.state.string_position = ctx.string_position
+        count = repeat.count + 1
+
+        if count < mincount:
+            # not enough matches
+            repeat.count = count
+            repeat.repeat_stack.append(repeat.push_new_context(4))
+            ctx.backup_value(mincount)
+            ctx.backup_value(maxcount)
+            ctx.backup_value(count)
+            ctx.backup_value(0)
+            ctx.repeat = repeat
+            return False
+
+        # see if the tail matches
+        ctx.state.marks_push()
+        ctx.state.repeat = repeat.previous
+        ctx.push_new_context(1)
+        ctx.backup_value(mincount)
+        ctx.backup_value(maxcount)
+        ctx.backup_value(count)
+        ctx.backup_value(1)
+        ctx.repeat = repeat
+        return False
+
+    # Case 2: Resumed
+    else:
+        repeat = ctx.repeat
+        if repeat.has_matched == ctx.MATCHED:
+            ctx.has_matched = ctx.MATCHED
+            return True
+        values = ctx.restore_values()
+        mincount = values[0]
+        maxcount = values[1]
+        count = values[2]
+        matching_state = values[3]
+
+        if count < mincount:
+            # not enough matches
+            ctx.has_matched = repeat.repeat_stack.pop().has_matched
+            if ctx.has_matched == ctx.NOT_MATCHED:
+                repeat.count = count - 1
+                ctx.state.string_position = ctx.string_position
+            return True
+        
+        if matching_state == 1:
+            # returning from tail matching
+            if ctx.child_context.has_matched == ctx.MATCHED:
+                ctx.has_matched = ctx.MATCHED
+                return True
+            ctx.state.repeat = repeat
+            ctx.state.string_position = ctx.string_position
+            ctx.state.marks_pop()
+
+        if not matching_state == 2:
+            # match more until tail matches
+            if count >= maxcount and maxcount != MAXREPEAT:
+                ctx.has_matched = ctx.NOT_MATCHED
+                return True
+            repeat.count = count
+            repeat.repeat_stack.append(repeat.push_new_context(4))
+            ctx.backup_value(mincount)
+            ctx.backup_value(maxcount)
+            ctx.backup_value(count)
+            ctx.backup_value(2)
+            ctx.repeat = repeat
+            return False
+
+        # Final return
+        ctx.has_matched = repeat.repeat_stack.pop().has_matched
+        repeat.has_matched = ctx.has_matched
+        if ctx.has_matched == ctx.NOT_MATCHED:
+            repeat.count = count - 1
+            ctx.state.string_position = ctx.string_position
+        return True
+
 def op_jump(space, ctx):
     # jump forward
     # <JUMP>/<INFO> <offset>
@@ -608,7 +832,7 @@
         return True
     while group_start < group_end:
         # XXX This is really a bit unwieldy. Can this be improved?
-        new_char = space.int_w(space.ord(ctx.peek_char()))
+        new_char = ctx.peek_char_ord()
         old_char = space.int_w(space.ord(
                     space.getitem(ctx.state.w_string, space.wrap(group_start))))
         if ctx.at_end() or (not ignore and old_char != new_char) \
@@ -713,12 +937,12 @@
     op_jump, op_jump,
     op_literal, op_literal_ignore,
     op_mark,
-    None, #MAX_UNTIL,
-    None, #MIN_UNTIL,
+    op_max_until,
+    op_min_until,
     op_not_literal, op_not_literal_ignore,
     None, #NEGATE,
     None, #RANGE,
-    None, #REPEAT,
+    op_repeat,
     op_repeat_one,
     None, #SUBPATTERN,
     op_min_repeat_one,
@@ -824,7 +1048,7 @@
     return ctx.at_beginning()
 
 def at_beginning_line(space, ctx):
-    return ctx.at_beginning() or is_linebreak(space, ctx.peek_char(space.wrap(-1)))
+    return ctx.at_beginning() or is_linebreak(space, ctx.peek_char(-1))
     
 def at_end(space, ctx):
     return ctx.at_end() or (ctx.remaining_chars() == 1 and ctx.at_linebreak())

Modified: pypy/release/0.7.x/pypy/module/_sre/test/test_app_sre.py
==============================================================================
--- pypy/release/0.7.x/pypy/module/_sre/test/test_app_sre.py	(original)
+++ pypy/release/0.7.x/pypy/module/_sre/test/test_app_sre.py	Sat Aug 27 17:59:05 2005
@@ -451,6 +451,13 @@
         assert re.search(r"b(?<!\d.)a", "ba")
         assert not re.search(r"b(?<!\d.)a", "11ba")
 
+    def test_bug_725149(self):
+        # mark_stack_base restoring before restoring marks
+        # test copied from CPython test
+        import re
+        assert re.match('(a)(?:(?=(b)*)c)*', 'abb').groups() == ('a', None)
+        assert re.match('(a)((?!(b)*))*', 'abb').groups() == ('a', None, None)
+
 
 class AppTestMarksStack:
 
@@ -835,7 +842,7 @@
         s.assert_no_match(opcodes, ["b"])
         assert "aab" == s.search(opcodes, "aabb").group(0)
 
-    def test_max_until_error(self):
+    def test_min_until_error(self):
         opcodes = [s.OPCODES["min_until"], s.OPCODES["success"]]
         raises(RuntimeError, s.search, opcodes, "a")
 

Modified: pypy/release/0.7.x/pypy/module/_sre/test/test_interp_sre.py
==============================================================================
--- pypy/release/0.7.x/pypy/module/_sre/test/test_interp_sre.py	(original)
+++ pypy/release/0.7.x/pypy/module/_sre/test/test_interp_sre.py	Sat Aug 27 17:59:05 2005
@@ -11,7 +11,7 @@
     state = isre.W_State(space, space.wrap(string), space.wrap(0),
                                                 space.wrap(end), space.wrap(0))
     state.string_position = string_position
-    return isre.W_MatchContext(space, state, space.newlist([]))
+    return isre.MatchContext(space, state, [])
 
 def test_is_uni_linebreak(space):
     for char in ["\n", "\r"]:
@@ -77,7 +77,7 @@
         assert not isre.at_boundary(space, create_context(space, string, pos, end))
 
 def test_getlower(space):
-    assert space.int_w(isre.getlower(space, space.wrap(ord("A")), space.wrap(0))) == ord("a")
+    assert isre.getlower(space, ord("A"), 0) == ord("a")
 
 def test_get_byte_array(space):
     if sys.byteorder == "big":



More information about the Pypy-commit mailing list