[pypy-svn] r56041 - in pypy/branch/jit-hotpath/pypy/lang/automata: . test

fijal at codespeak.net fijal at codespeak.net
Mon Jun 23 21:41:54 CEST 2008


Author: fijal
Date: Mon Jun 23 21:41:53 2008
New Revision: 56041

Modified:
   pypy/branch/jit-hotpath/pypy/lang/automata/nfa.py
   pypy/branch/jit-hotpath/pypy/lang/automata/test/test_nfa.py
Log:
A version that passes tests, but fairly ugly. Eventual cleanup would be
cool.


Modified: pypy/branch/jit-hotpath/pypy/lang/automata/nfa.py
==============================================================================
--- pypy/branch/jit-hotpath/pypy/lang/automata/nfa.py	(original)
+++ pypy/branch/jit-hotpath/pypy/lang/automata/nfa.py	Mon Jun 23 21:41:53 2008
@@ -5,6 +5,7 @@
         self.transitions = {}
         self.final_states = {}
         self.has_epsilon_moves = False
+        self.last_transition = (-1, ' ')
 
     def add_state(self, final=False):
         state = self.num_states
@@ -14,6 +15,7 @@
         return self.num_states - 1
 
     def add_transition(self, state, input, next_state):
+        self.last_transition = (state, input)
         if input == '?':
             self.has_epsilon_moves = True
         if (state, input) in self.transitions:
@@ -21,6 +23,12 @@
         else:
             self.transitions[state, input] = [next_state]
 
+    def fixup_last_transition(self, where):
+        if self.last_transition == (-1, ' '):
+            raise RuntimeError("Something went wrong...")
+        self.transitions[self.last_transition][-1] = where
+        self.last_transition = (-1, ' ')
+
     def get_transitions(self, state, input):
         return self.transitions[state, input]
 
@@ -40,14 +48,24 @@
         for (s, v), next_s_l in self.transitions.items():
             if v == '?':
                 for next_s in next_s_l:
-                    if next_s in possible_merges:
-                        possible_merges[next_s][s] = None
+                    if s == next_s: # complete nonsese
+                        if len(next_s_l) == 1:
+                            del self.transitions[(s, v)]
+                        else:
+                            self.transitions[(s, v)].remove(s)
                     else:
-                        possible_merges[next_s] = {s:None}
+                        if next_s in possible_merges:
+                            possible_merges[next_s][s] = None
+                        else:
+                            possible_merges[next_s] = {s:None}
             else:
                 prohibited_merges[s] = None
         for k, v in possible_merges.items():
-            v = dict.fromkeys([i for i in v if i not in prohibited_merges])
+            new_v = {}
+            for i in v:
+                if i not in prohibited_merges:
+                    new_v[i] = None
+            v = new_v
             if len(v) > 1:
                 first = v.keys()[0]
                 self.merge_states(first, v)
@@ -63,7 +81,7 @@
         for k in self.final_states.keys():
             if k in vdict:
                 self.final_states[to_what] = None
-                del final_states[k]
+                del self.final_states[k]
 
     def _remove_epsilon_moves(self):
         for (s, v), next_s_l in self.transitions.items():
@@ -99,7 +117,8 @@
                 all[next] = None
         for fs in self.final_states:
             all[fs] = None
-        if all == accessible:
+        # we cannot compare dicts in rpython
+        if len(all) == len(accessible):
             return False
         else:
             for (s, v), next_s_l in self.transitions.items():
@@ -176,24 +195,32 @@
     i = pos
     last_state = -1
     state = start_state
+    previous_state = -1
     while i < len(input):
         c = input[i]
         if in_alphabet(c):
             next_state = nfa.add_state()
             nfa.add_transition(state, c, next_state)
+            previous_state = state
             state = next_state
         elif c == ')':
             break
         elif c == '(':
+            previous_state = state
             i, state = compile_part(nfa, state, input, i + 1)
         elif c == '|':
             if last_state == -1:
                 last_state = nfa.add_state()
             nfa.add_transition(state, '?', last_state)
             state = start_state
+            previous_state = -1
         elif c == '*':
-            nfa.add_transition(state, '?', start_state)
-            state = start_state
+            if nfa.last_transition[0] != -1:
+                nfa.fixup_last_transition(previous_state)
+            else:
+                nfa.add_transition(state, '?', previous_state)
+            state = previous_state
+            previous_state = -1
         else:
             raise ValueError("Unknown char %s" % c)
         i += 1

Modified: pypy/branch/jit-hotpath/pypy/lang/automata/test/test_nfa.py
==============================================================================
--- pypy/branch/jit-hotpath/pypy/lang/automata/test/test_nfa.py	(original)
+++ pypy/branch/jit-hotpath/pypy/lang/automata/test/test_nfa.py	Mon Jun 23 21:41:53 2008
@@ -77,24 +77,31 @@
     assert sorted(re.final_states.keys()) == [4, 9]
     re = compile_regex("a*")
     re.remove_epsilon_moves()
-    assert re.transitions == {(0, "a"):[1],
-                              (1, "a"):[1]}
-    assert sorted(re.final_states.keys()) == [0, 1]
+    assert re.transitions == {(0, "a"):[0]}
+    assert re.final_states.keys() == [0]
     re = compile_regex("a*b")
     re.remove_epsilon_moves()
-    assert re.transitions == {(0, "a"):[1], (1, "b"):[2],
-                              (0, 'b'):[2], (1, 'a'):[1]}
+    assert re.transitions == {(0, "a"):[0], (0, "b"):[2]}
     assert re.final_states.keys() == [2]
     re = compile_regex("|a")
     re.remove_epsilon_moves()
     assert re.transitions == {(0, "a"):[2]}
     assert re.final_states.keys() == [0,2]
+    re = compile_regex("abc(ced)*")
+    re.remove_epsilon_moves()
+    assert re.transitions == {(3, 'c'): [4], (0, 'a'): [1], (5, 'd'): [3],
+                              (4, 'e'): [5], (1, 'b'): [2], (2, 'c'): [3]}
+    assert re.final_states.keys() == [3]
     #re = compile_regex('a{0,3}')
     #assert re.transitions == {(0, "a"):[0,1],
     #                          (1, "a"):[0,2],
     #                          (2, "a"):[0,3]}
     #assert re.final_states.keys() == [0]
 
+def test_nfa_recognize():
+    nfa = compile_regex("abcc*")
+    assert recognize(nfa, "abc")
+
 def test_nfa_compiledummy():
     py.test.skip("not working")
     def main(gets):



More information about the Pypy-commit mailing list