[pypy-svn] r51843 - in pypy/dist/pypy/rlib/parsing: . test

jared.grubb at codespeak.net jared.grubb at codespeak.net
Mon Feb 25 06:34:17 CET 2008


Author: jared.grubb
Date: Mon Feb 25 06:34:15 2008
New Revision: 51843

Modified:
   pypy/dist/pypy/rlib/parsing/deterministic.py
   pypy/dist/pypy/rlib/parsing/regexparse.py
   pypy/dist/pypy/rlib/parsing/test/test_deterministic.py
Log:
parsing/deterministic.py: add comments, make some code simpler 


Modified: pypy/dist/pypy/rlib/parsing/deterministic.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/deterministic.py	(original)
+++ pypy/dist/pypy/rlib/parsing/deterministic.py	Mon Feb 25 06:34:15 2008
@@ -6,22 +6,49 @@
     from sets import Set as set, ImmutableSet as frozenset
 
 def compress_char_set(chars):
+    """Take the character list and compress runs of adjacent
+    characters; the result is a list of the first character in
+    a run and the number of chars following, sorted with longer
+    runs first.
+    
+    Example: 'abc' => [('a', 3)]
+    Example: 'abcmxyz' => [('a',3),('x',3),('m',1)]"""
+    # Find the runs. Creates a list like [['a',3],['m',1],['x',3]]
     chars = list(chars)
     chars.sort()
-    result = [chars[0], 1]
+    result = [[chars[0], 1]]
     for a, b in zip(chars[:-1], chars[1:]):
         if ord(a) == ord(b) - 1:
-            result.append(result.pop() + 1)
+            # Found adjacent characters, increment counter
+            result[-1][1] += 1
         else:
-            result.append(b)
-            result.append(1)
-    real_result = []
-    for i in range(len(result) // 2):
-        real_result.append((result[i * 2 + 1], result[i * 2]))
-    real_result.sort()
-    real_result = zip(*zip(*real_result)[::-1])
+            # Found a 'hole', so create a new entry
+            result += [[b, 1]]
+    
+    # Change the above list into a list of sorted tuples
+    real_result = [(c,l) for [c,l] in result]
+    real_result.sort(key=lambda (l,c): (-c,l))
     return real_result
 
+def make_nice_charset_repr(chars):
+    # Compress the letters & digits
+    letters = set(chars) & set("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
+    therest = set(chars) - letters - set('-')
+    charranges = compress_char_set(letters)
+    result = []
+    for a, num in charranges:
+        if num == 1:
+            result.append(a)
+        elif num==2:    # 'ab' better than 'a-b'
+            result.append(a)
+            result.append(chr(ord(a)+1))
+        else:
+            result.append("%s-%s" % (repr(a)[1:-1], repr(chr(ord(a) + num - 1))[1:-1]))
+    result += [repr(c)[1:-1] for c in therest]
+    if '-' in chars:
+        result += ['\\-']
+    return "".join(result)
+
 class LexerError(Exception):
     def __init__(self, input, state, source_pos):
         self.input = input
@@ -36,18 +63,6 @@
         result.append("LexerError")
         return "\n".join(result)
 
-def make_nice_charset_repr(chars):
-    charranges = compress_char_set(chars)
-    result = []
-    for a, num in charranges:
-        if num == 1:
-            result.append(a)
-            if a == "-":
-                result.append("\\-")
-        else:
-            result.append("%s-%s" % (repr(a)[1:-1], repr(chr(ord(a) + num - 1))[1:-1]))
-    return "".join(result)
-
 class DFA(object):
     def __init__(self, num_states=0, transitions=None, final_states=None,
                  unmergeable_states=None, names=None):
@@ -81,8 +96,9 @@
         if name is None:
             name = str(state)
         self.names.append(name)
-        return self.num_states - 1
+        return state
 
+    # DFA returns transitions like a dict()
     def __setitem__(self, (state, input), next_state):
         self.transitions[state, input] = next_state
 
@@ -94,7 +110,7 @@
 
     def get_all_chars(self):
         all_chars = set()
-        for state, input in self.transitions:
+        for (state, input) in self.transitions:
             all_chars.add(input)
         return all_chars
 
@@ -448,25 +464,27 @@
         return result
 
     def epsilon_closure(self, states):
-        result = set(states)
+        """Return the epsilon-closure of 'states'."""
+        closure = set(states)   # states are in closure, by definition
         stack = list(states)
         while stack:
             state = stack.pop()
+            # Get all next_state s.t. state->next_state is marked epsilon (None):
             for next_state in self.transitions.get(state, {}).get(None, set()):
-                if next_state not in result:
-                    result.add(next_state)
-                    stack.append(next_state)
-        return result
+                if next_state not in closure:
+                    closure.add(next_state)
+                    stack.append(next_state)    # Need to find eps-cl of next_state
+        return closure
 
     def make_deterministic(self, name_precedence=None):
         fda = DFA()
         set_to_state = {}
         stack = []
-        def get_state(states):
+        def get_dfa_state(states):
             states = self.epsilon_closure(states)
             frozenstates = frozenset(states)
             if frozenstates in set_to_state:
-                return set_to_state[frozenstates]
+                return set_to_state[frozenstates]   # already created this state
             if states == self.start_states:
                 assert not set_to_state
             final = bool(
@@ -495,7 +513,7 @@
                 name, final, unmergeable)
             stack.append((result, states))
             return result
-        startstate = get_state(self.start_states)
+        startstate = get_dfa_state(self.start_states)
         while stack:
             fdastate, ndastates = stack.pop()
             chars_to_states = {}
@@ -506,7 +524,7 @@
             for char, states in chars_to_states.iteritems():
                 if char is None:
                     continue
-                fda[fdastate, char] = get_state(states)
+                fda[fdastate, char] = get_dfa_state(states)
         return fda
 
     def update(self, other):

Modified: pypy/dist/pypy/rlib/parsing/regexparse.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/regexparse.py	(original)
+++ pypy/dist/pypy/rlib/parsing/regexparse.py	Mon Feb 25 06:34:15 2008
@@ -161,8 +161,7 @@
     return r
 
 def make_runner(regex, view=False):
-    p = RegexParser(regex)
-    r = p.parse()
+    r = parse_regex(regex)
     nfa = r.make_automaton()
     dfa = nfa.make_deterministic()
     if view:

Modified: pypy/dist/pypy/rlib/parsing/test/test_deterministic.py
==============================================================================
--- pypy/dist/pypy/rlib/parsing/test/test_deterministic.py	(original)
+++ pypy/dist/pypy/rlib/parsing/test/test_deterministic.py	Mon Feb 25 06:34:15 2008
@@ -155,6 +155,19 @@
     fda = a.make_deterministic()
     
 def test_compress_char_set():
+    import string
     assert compress_char_set("ace") == [('a', 1), ('c', 1), ('e', 1)]
     assert compress_char_set("abcdefg") == [('a', 7)]
     assert compress_char_set("ABCabc") == [('A', 3), ('a', 3)]
+    assert compress_char_set("zycba") == [('a',3), ('y',2)]
+    assert compress_char_set(string.ascii_letters) == [('A', 26), ('a', 26)]
+    assert compress_char_set(string.printable) == [(' ', 95), ('\t', 5)]
+    
+def test_make_nice_charset_repr():
+    import string
+    assert make_nice_charset_repr("ace") == 'ace'
+    assert make_nice_charset_repr("abcdefg") == 'a-g'
+    assert make_nice_charset_repr("ABCabc") == 'A-Ca-c'
+    assert make_nice_charset_repr("zycba") == 'a-cyz'
+    assert make_nice_charset_repr(string.ascii_letters) == 'A-Za-z'
+    assert make_nice_charset_repr(string.printable) == 'A-Za-z0-9\\t\\x0b\\n\\r\\x0c! #"%$\'&)(+*,/.;:=<?>@[]\\\\_^`{}|~\\-'



More information about the Pypy-commit mailing list