[pypy-svn] r16099 - pypy/dist/pypy/module/_sre

nik at codespeak.net nik at codespeak.net
Tue Aug 16 14:29:52 CEST 2005


Author: nik
Date: Tue Aug 16 14:29:52 2005
New Revision: 16099

Modified:
   pypy/dist/pypy/module/_sre/app_sre.py
Log:
refactored some methods to be functions. this will allow me to move the
_State and _MatchContext classes to interp-level and come up with a
scheme to gradually move over the actual bytecode interpreter to
interp-level.


Modified: pypy/dist/pypy/module/_sre/app_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/app_sre.py	(original)
+++ pypy/dist/pypy/module/_sre/app_sre.py	Tue Aug 16 14:29:52 2005
@@ -36,7 +36,7 @@
         regular expression, return a corresponding MatchObject instance. Return
         None if the string does not match the pattern."""
         state = _State(string, pos, endpos, self.flags)
-        if state.match(self._code):
+        if match(state, self._code):
             return SRE_Match(self, state)
         else:
             return None
@@ -47,7 +47,7 @@
         instance. Return None if no position in the string matches the
         pattern."""
         state = _State(string, pos, endpos, self.flags)
-        if state.search(self._code):
+        if search(state, self._code):
             return SRE_Match(self, state)
         else:
             return None
@@ -59,7 +59,7 @@
         while state.start <= state.end:
             state.reset()
             state.string_position = state.start
-            if not state.search(self._code):
+            if not search(state, self._code):
                 break
             match = SRE_Match(self, state)
             if self.groups == 0 or self.groups == 1:
@@ -86,7 +86,7 @@
         while not count or n < count:
             state.reset()
             state.string_position = state.start
-            if not state.search(self._code):
+            if not search(state, self._code):
                 break
             if last_pos < state.start:
                 sublist.append(string[last_pos:state.start])
@@ -132,7 +132,7 @@
         while not maxsplit or n < maxsplit:
             state.reset()
             state.string_position = state.start
-            if not state.search(self._code):
+            if not search(state, self._code):
                 break
             if state.start == state.string_position: # zero-width match
                 if last == state.end:                # or end of string
@@ -176,7 +176,7 @@
         state.reset()
         state.string_position = state.start
         match = None
-        if matcher(self.pattern._code):
+        if matcher(state, self.pattern._code):
             match = SRE_Match(self.pattern, state)
         if match is None or state.string_position == state.start:
             state.start += 1
@@ -185,10 +185,10 @@
         return match
 
     def match(self):
-        return self._match_search(self._state.match)
+        return self._match_search(match)
 
     def search(self):
-        return self._match_search(self._state.search)
+        return self._match_search(search)
 
 
 class SRE_Match(object):
@@ -326,100 +326,6 @@
         self.context_stack = []
         self.repeat = None
 
-    def match(self, pattern_codes):
-        # Optimization: Check string length. pattern_codes[3] contains the
-        # minimum length for a string to possibly match.
-        if pattern_codes[0] == OPCODES["info"] and pattern_codes[3]:
-            if self.end - self.string_position < pattern_codes[3]:
-                #_log("reject (got %d chars, need %d)"
-                #         % (self.end - self.string_position, pattern_codes[3]))
-                return False
-        
-        dispatcher = _OpcodeDispatcher()
-        self.context_stack.append(_MatchContext(self, pattern_codes))
-        has_matched = None
-        while len(self.context_stack) > 0:
-            context = self.context_stack[-1]
-            has_matched = dispatcher.match(context)
-            if has_matched is not None: # don't pop if context isn't done
-                self.context_stack.pop()
-        return has_matched
-
-    def search(self, pattern_codes):
-        flags = 0
-        if pattern_codes[0] == OPCODES["info"]:
-            # optimization info block
-            # <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>
-            if pattern_codes[2] & SRE_INFO_PREFIX and pattern_codes[5] > 1:
-                return self.fast_search(pattern_codes)
-            flags = pattern_codes[2]
-            pattern_codes = pattern_codes[pattern_codes[1] + 1:]
-
-        string_position = self.start
-        if pattern_codes[0] == OPCODES["literal"]:
-            # Special case: Pattern starts with a literal character. This is
-            # used for short prefixes
-            character = pattern_codes[1]
-            while True:
-                while string_position < self.end \
-                        and ord(self.string[string_position]) != character:
-                    string_position += 1
-                if string_position >= self.end:
-                    return False
-                self.start = string_position
-                string_position += 1
-                self.string_position = string_position
-                if flags & SRE_INFO_LITERAL:
-                    return True
-                if self.match(pattern_codes[2:]):
-                    return True
-            return False
-
-        # General case
-        while string_position <= self.end:
-            self.reset()
-            self.start = self.string_position = string_position
-            if self.match(pattern_codes):
-                return True
-            string_position += 1
-        return False
-
-    def fast_search(self, pattern_codes):
-        """Skips forward in a string as fast as possible using information from
-        an optimization info block."""
-        # pattern starts with a known prefix
-        # <5=length> <6=skip> <7=prefix data> <overlap data>
-        flags = pattern_codes[2]
-        prefix_len = pattern_codes[5]
-        prefix_skip = pattern_codes[6] # don't really know what this is good for
-        prefix = pattern_codes[7:7 + prefix_len]
-        overlap = pattern_codes[7 + prefix_len - 1:pattern_codes[1] + 1]
-        pattern_codes = pattern_codes[pattern_codes[1] + 1:]
-        i = 0
-        string_position = self.string_position
-        while string_position < self.end:
-            while True:
-                if ord(self.string[string_position]) != prefix[i]:
-                    if i == 0:
-                        break
-                    else:
-                        i = overlap[i]
-                else:
-                    i += 1
-                    if i == prefix_len:
-                        # found a potential match
-                        self.start = string_position + 1 - prefix_len
-                        self.string_position = string_position + 1 \
-                                                     - prefix_len + prefix_skip
-                        if flags & SRE_INFO_LITERAL:
-                            return True # matched all of pure literal pattern
-                        if self.match(pattern_codes[2 * prefix_skip:]):
-                            return True
-                        i = overlap[i]
-                    break
-            string_position += 1
-        return False
-
     def set_mark(self, mark_nr, position):
         if mark_nr & 1:
             # This id marks the end of a group.
@@ -451,6 +357,103 @@
         return _sre.getlower(char_ord, self.flags)
 
 
+def search(state, pattern_codes):
+    flags = 0
+    if pattern_codes[0] == OPCODES["info"]:
+        # optimization info block
+        # <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>
+        #if pattern_codes[2] & SRE_INFO_PREFIX and pattern_codes[5] > 1:
+        #    return state.fast_search(pattern_codes)
+        flags = pattern_codes[2]
+        pattern_codes = pattern_codes[pattern_codes[1] + 1:]
+
+    string_position = state.start
+    if pattern_codes[0] == OPCODES["literal"]:
+        # Special case: Pattern starts with a literal character. This is
+        # used for short prefixes
+        character = pattern_codes[1]
+        while True:
+            while string_position < state.end \
+                    and ord(state.string[string_position]) != character:
+                string_position += 1
+            if string_position >= state.end:
+                return False
+            state.start = string_position
+            string_position += 1
+            state.string_position = string_position
+            if flags & SRE_INFO_LITERAL:
+                return True
+            if match(state, pattern_codes[2:]):
+                return True
+        return False
+
+    # General case
+    while string_position <= state.end:
+        state.reset()
+        state.start = state.string_position = string_position
+        if match(state, pattern_codes):
+            return True
+        string_position += 1
+    return False
+
+
+def fast_search(state, pattern_codes):
+    """Skips forward in a string as fast as possible using information from
+    an optimization info block."""
+    # pattern starts with a known prefix
+    # <5=length> <6=skip> <7=prefix data> <overlap data>
+    flags = pattern_codes[2]
+    prefix_len = pattern_codes[5]
+    prefix_skip = pattern_codes[6] # don't really know what this is good for
+    prefix = pattern_codes[7:7 + prefix_len]
+    overlap = pattern_codes[7 + prefix_len - 1:pattern_codes[1] + 1]
+    pattern_codes = pattern_codes[pattern_codes[1] + 1:]
+    i = 0
+    string_position = state.string_position
+    while string_position < state.end:
+        while True:
+            if ord(state.string[string_position]) != prefix[i]:
+                if i == 0:
+                    break
+                else:
+                    i = overlap[i]
+            else:
+                i += 1
+                if i == prefix_len:
+                    # found a potential match
+                    state.start = string_position + 1 - prefix_len
+                    state.string_position = string_position + 1 \
+                                                 - prefix_len + prefix_skip
+                    if flags & SRE_INFO_LITERAL:
+                        return True # matched all of pure literal pattern
+                    if match(state, pattern_codes[2 * prefix_skip:]):
+                        return True
+                    i = overlap[i]
+                break
+        string_position += 1
+    return False
+
+
+def match(state, pattern_codes):
+    # Optimization: Check string length. pattern_codes[3] contains the
+    # minimum length for a string to possibly match.
+    if pattern_codes[0] == OPCODES["info"] and pattern_codes[3]:
+        if state.end - state.string_position < pattern_codes[3]:
+            #_log("reject (got %d chars, need %d)"
+            #         % (state.end - state.string_position, pattern_codes[3]))
+            return False
+    
+    dispatcher = _OpcodeDispatcher()
+    state.context_stack.append(_MatchContext(state, pattern_codes))
+    has_matched = None
+    while len(state.context_stack) > 0:
+        context = state.context_stack[-1]
+        has_matched = dispatcher.match(context)
+        if has_matched is not None: # don't pop if context isn't done
+            state.context_stack.pop()
+    return has_matched
+
+
 class _MatchContext(object):
 
     def __init__(self, state, pattern_codes):



More information about the Pypy-commit mailing list