[pypy-svn] r55906 - in pypy/branch/jit-hotpath/pypy/lang/automata: . test

fijal at codespeak.net fijal at codespeak.net
Mon Jun 16 23:54:45 CEST 2008


Author: fijal
Date: Mon Jun 16 23:54:43 2008
New Revision: 55906

Modified:
   pypy/branch/jit-hotpath/pypy/lang/automata/nfa.py
   pypy/branch/jit-hotpath/pypy/lang/automata/test/test_nfa.py
Log:
A simple nfa builder, with tests


Modified: pypy/branch/jit-hotpath/pypy/lang/automata/nfa.py
==============================================================================
--- pypy/branch/jit-hotpath/pypy/lang/automata/nfa.py	(original)
+++ pypy/branch/jit-hotpath/pypy/lang/automata/nfa.py	Mon Jun 16 23:54:43 2008
@@ -74,3 +74,82 @@
             i, state = stack.pop()
 
     return state in automaton.final_states
+
+class Builder(object):
+    def __init__(self):
+        self.nfa = NFA()
+        self.current_state = self.nfa.add_state()
+
+    def add_transition(self, c, state=-1, final=False):
+        if state == -1:
+            state = self.nfa.add_state(final)
+        elif final:
+            self.nfa.final_states[state] = None
+        self.nfa.add_transition(self.current_state, c, state)
+        self.current_state = state
+
+    def add_cycle(self, state):
+        """ We change all transitions pointing to current state
+        to point to state passed as argument
+        """
+        to_replace = self.current_state
+        for (fr, ch), v in self.nfa.transitions.items():
+            for i in range(len(v)):
+                if v[i] == to_replace:
+                    v[i] = state
+            if fr == to_replace:
+                del self.nfa.transitions[(fr, ch)]
+            self.nfa.transitions[(state, ch)] = v
+        try:
+            del self.nfa.final_states[to_replace]
+        except KeyError:
+            pass
+        else:
+            self.nfa.final_states[state] = None
+
+def no_more_chars(i, input):
+    for k in range(i+1, len(input)):
+        if input[k] >= 'a' and input[k] <= 'z':
+            return False
+    return True
+
+def compile_regex(input):
+    """ Simple compilation routine, just in order to not have to mess
+    up with creating automaton by hand. We assume alphabet to be a-z
+    """
+    builder = Builder()
+    i = 0
+    last_anchor = builder.current_state
+    joint_point = -1
+    paren_stack = []
+    last_state = -1
+    while i < len(input):
+        c = input[i]
+        if c >= 'a' and c <= 'z':
+            final = no_more_chars(i, input)
+            last_state = builder.current_state
+            if (final or input[i + 1] == ')') and joint_point != -1:
+                builder.add_transition(c, state=joint_point, final=final)
+                join_point = -1
+            else:
+                builder.add_transition(c, final=final)
+        elif c == "|":
+            last_state = -1
+            joint_point = builder.current_state
+            builder.current_state = last_anchor
+        elif c == '(':
+            paren_stack.append((builder.current_state, last_anchor, joint_point))
+            last_anchor = builder.current_state
+            joint_point = -1
+        elif c == ')':
+            if not paren_stack:
+                raise ValueError("Unmatched parentheses")
+            last_state, last_anchor, joint_point = paren_stack.pop()
+        elif c == '*':
+            if last_state == -1:
+                raise ValueError("Mismatched *")
+            builder.add_cycle(last_state)
+        else:
+            raise ValueError("Unknown char %s" % c)
+        i += 1
+    return builder.nfa

Modified: pypy/branch/jit-hotpath/pypy/lang/automata/test/test_nfa.py
==============================================================================
--- pypy/branch/jit-hotpath/pypy/lang/automata/test/test_nfa.py	(original)
+++ pypy/branch/jit-hotpath/pypy/lang/automata/test/test_nfa.py	Mon Jun 16 23:54:43 2008
@@ -27,6 +27,36 @@
 def test_nfa_interp():
     interpret(rundfa, [])
 
+def test_nfa_build():
+    re = compile_regex("abcd")
+    assert re.transitions == {(0, "a"):[1],
+                              (1, "b"):[2],
+                              (2, "c"):[3],
+                              (3, "d"):[4]}
+    assert re.final_states.keys() == [4]
+    re = compile_regex("ab|de")
+    assert re.transitions == {(0, "a"):[1],
+                              (1, "b"):[2],
+                              (0, "d"):[3],
+                              (3, "e"):[2]}
+    assert re.final_states.keys() == [2]
+    re = compile_regex("a(b|c)(d)")
+    assert re.transitions == {(0, "a"):[1],
+                              (1, "b"):[2],
+                              (1, "c"):[2],
+                              (2, "d"):[3]}
+    assert re.final_states.keys() == [3]
+    re = compile_regex("(a|c)(c|d)|ab")
+    assert re.transitions == {(0, "a"):[1,3],
+                              (0, "c"):[1],
+                              (1, "c"):[2],
+                              (1, "d"):[2],
+                              (3, "b"):[2]}
+    assert re.final_states.keys() == [2]
+    re = compile_regex("a*")
+    assert re.transitions == {(0, "a"):[0]}
+    assert re.final_states.keys() == [0]
+
 def test_nfa_compiledummy():
     py.test.skip("not working")
     def main(gets):



More information about the Pypy-commit mailing list