[pypy-svn] r65631 - in pypy/branch/parser-compiler/pypy/interpreter/pyparser: . test

benjamin at codespeak.net benjamin at codespeak.net
Sat Jun 6 22:51:09 CEST 2009


Author: benjamin
Date: Sat Jun  6 22:51:08 2009
New Revision: 65631

Modified:
   pypy/branch/parser-compiler/pypy/interpreter/pyparser/parser.py
   pypy/branch/parser-compiler/pypy/interpreter/pyparser/test/test_parser.py
Log:
add parser implementation

Modified: pypy/branch/parser-compiler/pypy/interpreter/pyparser/parser.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/pyparser/parser.py	(original)
+++ pypy/branch/parser-compiler/pypy/interpreter/pyparser/parser.py	Sat Jun  6 22:51:08 2009
@@ -30,13 +30,112 @@
 
 class Node(object):
 
-    def __init__(self, type, value, children):
+    __slots__ = "type value children lineno column".split()
+
+    def __init__(self, type, value, children, lineno, column):
         self.type = type
         self.value = value
         self.children = children
+        self.lineno = lineno
+        self.column = column
+
+    def __eq__(self, other):
+        # For tests.
+        return (self.type == other.type and
+                self.value == other.value and
+                self.children == other.children)
+
+    def __repr__(self):
+        if self.value is None:
+            return "Node(type=%s, children=%r)" % (self.type, self.children)
+        else:
+            return "Node(type=%s, value=%r)" % (self.type, self.value)
+
+
+class ParseError(Exception):
+
+    def __init__(self, msg, token_type, value, lineno, column):
+        Exception.__init__(self, msg)
+        self.token_type = token_type
+        self.value = value
+        self.lineno = lineno
+        self.column = column
+
+    def __str__(self):
+        return "ParserError(%s, %r)" % (self.token_type, self.value)
 
 
 class Parser(object):
 
     def __init__(self, grammar):
-        pass
+        self.grammar = grammar
+
+    def prepare(self, start=-1):
+        if start == -1:
+            start = self.grammar.start
+        self.root = None
+        current_node = Node(start, None, [], 0, 0)
+        self.stack = []
+        self.stack.append((self.grammar.dfas[start], 0, current_node))
+
+    def add_token(self, token_type, value, lineno, column):
+        label_index = self.classify(token_type, value, lineno, column)
+        while True:
+            dfa, state_index, node = self.stack[-1]
+            states, first = dfa
+            arcs = states[state_index]
+            for i, next_state in arcs:
+                t = self.grammar.labels[i][0]
+                if label_index == i:
+                    self.shift(next_state, token_type, value, lineno, column)
+                    state_index = next_state
+                    while states[state_index] == [(0, state_index)]:
+                        self.pop()
+                        if not self.stack:
+                            return True
+                        dfa, state_index, node = self.stack[-1]
+                        states = dfa[0]
+                    return False
+                elif t >= 256:
+                    sub_node_dfa = self.grammar.dfas[t]
+                    if label_index in sub_node_dfa[1]:
+                        self.push(sub_node_dfa, next_state, t, lineno, column)
+                        break
+            else:
+                if (0, state_index) in arcs:
+                    self.pop()
+                    if not self.stack:
+                        raise ParseError("too much input", token_type, value,
+                                         lineno, column)
+                else:
+                    raise ParseError("bad input", token_type, value, lineno,
+                                     column)
+
+    def classify(self, token_type, value, lineno, column):
+        if token_type == self.grammar.KEYWORD_TOKEN:
+            label_index = self.grammar.keyword_ids.get(value, -1)
+            if label_index != -1:
+                return label_index
+        label_index = self.grammar.token_ids.get(token_type, -1)
+        if label_index == -1:
+            raise ParseError("invalid token", token_type, value, lineno, column)
+        return label_index
+
+    def shift(self, next_state, token_type, value, lineno, column):
+        dfa, state, node = self.stack[-1]
+        new_node = Node(token_type, value, None, lineno, column)
+        node.children.append(new_node)
+        self.stack[-1] = (dfa, next_state, node)
+
+    def push(self, next_dfa, next_state, node_type, lineno, column):
+        dfa, state, node = self.stack[-1]
+        new_node = Node(node_type, None, [], lineno, column)
+        self.stack[-1] = (dfa, next_state, node)
+        self.stack.append((next_dfa, 0, new_node))
+
+    def pop(self):
+        dfa, state, node = self.stack.pop()
+        if self.stack:
+            self.stack[-1][2].children.append(node)
+        else:
+            self.root = node

Modified: pypy/branch/parser-compiler/pypy/interpreter/pyparser/test/test_parser.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/pyparser/test/test_parser.py	(original)
+++ pypy/branch/parser-compiler/pypy/interpreter/pyparser/test/test_parser.py	Sat Jun  6 22:51:08 2009
@@ -45,3 +45,297 @@
     assert v == 9
 
 
+# New parser tests.
+import py
+import tokenize
+import token
+import StringIO
+from pypy.interpreter.pyparser import parser, metaparser, pygram
+from pypy.interpreter.pyparser.test.test_metaparser import MyGrammar
+
+
+class SimpleParser(parser.Parser):
+
+    def parse(self, input):
+        self.prepare()
+        rl = StringIO.StringIO(input + "\n").readline
+        gen = tokenize.generate_tokens(rl)
+        for tp, value, begin, end, line in gen:
+            if self.add_token(tp, value, *begin):
+                py.test.raises(StopIteration, gen.next)
+        return self.root
+
+
+def tree_from_string(expected, gram):
+    def count_indent(s):
+        indent = 0
+        for char in s:
+            if char != " ":
+                break
+            indent += 1
+        return indent
+    last_newline_index = 0
+    for i, char in enumerate(expected):
+        if char == "\n":
+            last_newline_index = i
+        elif char != " ":
+            break
+    if last_newline_index:
+        expected = expected[last_newline_index + 1:]
+    base_indent = count_indent(expected)
+    assert not divmod(base_indent, 4)[1], "not using 4 space indentation"
+    lines = [line[base_indent:] for line in expected.splitlines()]
+    last_indent = 0
+    node_stack = []
+    for line in lines:
+        if not line.strip():
+            continue
+        data = line.split()
+        if data[0].isupper():
+            tp = getattr(token, data[0])
+            if len(data) == 2:
+                value = data[1].strip("\"")
+            elif tp == token.NEWLINE:
+                value = "\n"
+            else:
+                value = ""
+            children = None
+        else:
+            tp = gram.symbol_ids[data[0]]
+            value = None
+            children = []
+        n = parser.Node(tp, value, children, 0, 0)
+        new_indent = count_indent(line)
+        if new_indent >= last_indent:
+            if new_indent == last_indent and node_stack:
+                node_stack.pop()
+            if node_stack:
+                node_stack[-1].children.append(n)
+            node_stack.append(n)
+        else:
+            diff = last_indent - new_indent
+            pop_nodes = diff // 4 + 1
+            del node_stack[-pop_nodes:]
+            node_stack[-1].children.append(n)
+            node_stack.append(n)
+        last_indent = new_indent
+    return node_stack[0]
+
+
+class TestParser:
+
+    def parser_for(self, gram, add_endmarker=True):
+        if add_endmarker:
+            gram += " NEWLINE ENDMARKER\n"
+        pgen = metaparser.ParserGenerator(gram)
+        g = pgen.build_grammar(MyGrammar)
+        return SimpleParser(g), g
+
+    def test_multiple_rules(self):
+        gram = """foo: 'next_rule' bar 'end' NEWLINE ENDMARKER
+bar: NAME NUMBER\n"""
+        p, gram = self.parser_for(gram, False)
+        expected = """
+        foo
+            NAME "next_rule"
+            bar
+                NAME "a_name"
+                NUMBER "42"
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        input = "next_rule a_name 42 end"
+        assert tree_from_string(expected, gram) == p.parse(input)
+
+    def test_recursive_rule(self):
+        gram = """foo: NAME bar STRING NEWLINE ENDMARKER
+bar: NAME [bar] NUMBER\n"""
+        p, gram = self.parser_for(gram, False)
+        expected = """
+        foo
+            NAME "hi"
+            bar
+                NAME "hello"
+                bar
+                    NAME "a_name"
+                    NUMBER "32"
+                NUMBER "42"
+            STRING "'string'"
+            NEWLINE
+            ENDMARKER"""
+        input = "hi hello a_name 32 42 'string'"
+        assert tree_from_string(expected, gram) == p.parse(input)
+
+    def test_symbol(self):
+        gram = """parent: first_child second_child NEWLINE ENDMARKER
+first_child: NAME age
+second_child: STRING
+age: NUMBER\n"""
+        p, gram = self.parser_for(gram, False)
+        expected = """
+        parent
+            first_child
+                NAME "harry"
+                age
+                     NUMBER "13"
+            second_child
+                STRING "'fred'"
+            NEWLINE
+            ENDMARKER"""
+        input = "harry 13 'fred'"
+        assert tree_from_string(expected, gram) == p.parse(input)
+
+    def test_token(self):
+        p, gram = self.parser_for("foo: NAME")
+        expected = """
+        foo
+           NAME "hi"
+           NEWLINE
+           ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("hi")
+        py.test.raises(parser.ParseError, p.parse, "567")
+        p, gram = self.parser_for("foo: NUMBER NAME STRING")
+        expected = """
+        foo
+           NUMBER "42"
+           NAME "hi"
+           STRING "'bar'"
+           NEWLINE
+           ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("42 hi 'bar'")
+
+    def test_optional(self):
+        p, gram = self.parser_for("foo: [NAME] 'end'")
+        expected = """
+        foo
+            NAME "hi"
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("hi end")
+        expected = """
+        foo
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("end")
+
+    def test_grouping(self):
+        p, gram = self.parser_for(
+            "foo: ((NUMBER NAME | STRING) | 'second_option')")
+        expected = """
+        foo
+            NUMBER "42"
+            NAME "hi"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("42 hi")
+        expected = """
+        foo
+            STRING "'hi'"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("'hi'")
+        expected = """
+        foo
+            NAME "second_option"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("second_option")
+        py.test.raises(parser.ParseError, p.parse, "42 a_name 'hi'")
+        py.test.raises(parser.ParseError, p.parse, "42 second_option")
+
+    def test_alternative(self):
+        p, gram = self.parser_for("foo: (NAME | NUMBER)")
+        expected = """
+        foo
+            NAME "hi"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("hi")
+        expected = """
+        foo
+            NUMBER "42"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("42")
+        py.test.raises(parser.ParseError, p.parse, "hi 23")
+        py.test.raises(parser.ParseError, p.parse, "23 hi")
+        py.test.raises(parser.ParseError, p.parse, "'some string'")
+
+    def test_keyword(self):
+        p, gram = self.parser_for("foo: 'key'")
+        expected = """
+        foo
+            NAME "key"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("key")
+        py.test.raises(parser.ParseError, p.parse, "")
+        p, gram = self.parser_for("foo: NAME 'key'")
+        expected = """
+        foo
+            NAME "some_name"
+            NAME "key"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("some_name key")
+        py.test.raises(parser.ParseError, p.parse, "some_name")
+
+    def test_repeaters(self):
+        p, gram = self.parser_for("foo: NAME+ 'end'")
+        expected = """
+        foo
+            NAME "hi"
+            NAME "bye"
+            NAME "nothing"
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("hi bye nothing end")
+        py.test.raises(parser.ParseError, p.parse, "end")
+        py.test.raises(parser.ParseError, p.parse, "hi bye")
+        p, gram = self.parser_for("foo: NAME* 'end'")
+        expected = """
+        foo
+            NAME "hi"
+            NAME "bye"
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("hi bye end")
+        py.test.raises(parser.ParseError, p.parse, "hi bye")
+        expected = """
+        foo
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("end")
+
+        p, gram = self.parser_for("foo: (NAME | NUMBER)+ 'end'")
+        expected = """
+        foo
+            NAME "a_name"
+            NAME "name_two"
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("a_name name_two end")
+        expected = """
+        foo
+            NUMBER "42"
+            NAME "name"
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("42 name end")
+        py.test.raises(parser.ParseError, p.parse, "end")
+        p, gram = self.parser_for("foo: (NAME | NUMBER)* 'end'")
+        expected = """
+        foo
+            NAME "hi"
+            NUMBER 42
+            NAME "end"
+            NEWLINE
+            ENDMARKER"""
+        assert tree_from_string(expected, gram) == p.parse("hi 42 end")



More information about the Pypy-commit mailing list