[pypy-svn] r65631 - in pypy/branch/parser-compiler/pypy/interpreter/pyparser: . test
benjamin at codespeak.net
benjamin at codespeak.net
Sat Jun 6 22:51:09 CEST 2009
Author: benjamin
Date: Sat Jun 6 22:51:08 2009
New Revision: 65631
Modified:
pypy/branch/parser-compiler/pypy/interpreter/pyparser/parser.py
pypy/branch/parser-compiler/pypy/interpreter/pyparser/test/test_parser.py
Log:
add parser implementation
Modified: pypy/branch/parser-compiler/pypy/interpreter/pyparser/parser.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/pyparser/parser.py (original)
+++ pypy/branch/parser-compiler/pypy/interpreter/pyparser/parser.py Sat Jun 6 22:51:08 2009
@@ -30,13 +30,112 @@
class Node(object):
- def __init__(self, type, value, children):
+ __slots__ = "type value children lineno column".split()
+
+ def __init__(self, type, value, children, lineno, column):
self.type = type
self.value = value
self.children = children
+ self.lineno = lineno
+ self.column = column
+
+ def __eq__(self, other):
+ # For tests.
+ return (self.type == other.type and
+ self.value == other.value and
+ self.children == other.children)
+
+ def __repr__(self):
+ if self.value is None:
+ return "Node(type=%s, children=%r)" % (self.type, self.children)
+ else:
+ return "Node(type=%s, value=%r)" % (self.type, self.value)
+
+
+class ParseError(Exception):
+
+ def __init__(self, msg, token_type, value, lineno, column):
+ Exception.__init__(self, msg)
+ self.token_type = token_type
+ self.value = value
+ self.lineno = lineno
+ self.column = column
+
+ def __str__(self):
+ return "ParserError(%s, %r)" % (self.token_type, self.value)
class Parser(object):
def __init__(self, grammar):
- pass
+ self.grammar = grammar
+
+ def prepare(self, start=-1):
+ if start == -1:
+ start = self.grammar.start
+ self.root = None
+ current_node = Node(start, None, [], 0, 0)
+ self.stack = []
+ self.stack.append((self.grammar.dfas[start], 0, current_node))
+
+ def add_token(self, token_type, value, lineno, column):
+ label_index = self.classify(token_type, value, lineno, column)
+ while True:
+ dfa, state_index, node = self.stack[-1]
+ states, first = dfa
+ arcs = states[state_index]
+ for i, next_state in arcs:
+ t = self.grammar.labels[i][0]
+ if label_index == i:
+ self.shift(next_state, token_type, value, lineno, column)
+ state_index = next_state
+ while states[state_index] == [(0, state_index)]:
+ self.pop()
+ if not self.stack:
+ return True
+ dfa, state_index, node = self.stack[-1]
+ states = dfa[0]
+ return False
+ elif t >= 256:
+ sub_node_dfa = self.grammar.dfas[t]
+ if label_index in sub_node_dfa[1]:
+ self.push(sub_node_dfa, next_state, t, lineno, column)
+ break
+ else:
+ if (0, state_index) in arcs:
+ self.pop()
+ if not self.stack:
+ raise ParseError("too much input", token_type, value,
+ lineno, column)
+ else:
+ raise ParseError("bad input", token_type, value, lineno,
+ column)
+
+ def classify(self, token_type, value, lineno, column):
+ if token_type == self.grammar.KEYWORD_TOKEN:
+ label_index = self.grammar.keyword_ids.get(value, -1)
+ if label_index != -1:
+ return label_index
+ label_index = self.grammar.token_ids.get(token_type, -1)
+ if label_index == -1:
+ raise ParseError("invalid token", token_type, value, lineno, column)
+ return label_index
+
+ def shift(self, next_state, token_type, value, lineno, column):
+ dfa, state, node = self.stack[-1]
+ new_node = Node(token_type, value, None, lineno, column)
+ node.children.append(new_node)
+ self.stack[-1] = (dfa, next_state, node)
+
+ def push(self, next_dfa, next_state, node_type, lineno, column):
+ dfa, state, node = self.stack[-1]
+ new_node = Node(node_type, None, [], lineno, column)
+ self.stack[-1] = (dfa, next_state, node)
+ self.stack.append((next_dfa, 0, new_node))
+
+ def pop(self):
+ dfa, state, node = self.stack.pop()
+ if self.stack:
+ self.stack[-1][2].children.append(node)
+ else:
+ self.root = node
Modified: pypy/branch/parser-compiler/pypy/interpreter/pyparser/test/test_parser.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/pyparser/test/test_parser.py (original)
+++ pypy/branch/parser-compiler/pypy/interpreter/pyparser/test/test_parser.py Sat Jun 6 22:51:08 2009
@@ -45,3 +45,297 @@
assert v == 9
+# New parser tests.
+import py
+import tokenize
+import token
+import StringIO
+from pypy.interpreter.pyparser import parser, metaparser, pygram
+from pypy.interpreter.pyparser.test.test_metaparser import MyGrammar
+
+
+class SimpleParser(parser.Parser):
+
+ def parse(self, input):
+ self.prepare()
+ rl = StringIO.StringIO(input + "\n").readline
+ gen = tokenize.generate_tokens(rl)
+ for tp, value, begin, end, line in gen:
+ if self.add_token(tp, value, *begin):
+ py.test.raises(StopIteration, gen.next)
+ return self.root
+
+
+def tree_from_string(expected, gram):
+ def count_indent(s):
+ indent = 0
+ for char in s:
+ if char != " ":
+ break
+ indent += 1
+ return indent
+ last_newline_index = 0
+ for i, char in enumerate(expected):
+ if char == "\n":
+ last_newline_index = i
+ elif char != " ":
+ break
+ if last_newline_index:
+ expected = expected[last_newline_index + 1:]
+ base_indent = count_indent(expected)
+ assert not divmod(base_indent, 4)[1], "not using 4 space indentation"
+ lines = [line[base_indent:] for line in expected.splitlines()]
+ last_indent = 0
+ node_stack = []
+ for line in lines:
+ if not line.strip():
+ continue
+ data = line.split()
+ if data[0].isupper():
+ tp = getattr(token, data[0])
+ if len(data) == 2:
+ value = data[1].strip("\"")
+ elif tp == token.NEWLINE:
+ value = "\n"
+ else:
+ value = ""
+ children = None
+ else:
+ tp = gram.symbol_ids[data[0]]
+ value = None
+ children = []
+ n = parser.Node(tp, value, children, 0, 0)
+ new_indent = count_indent(line)
+ if new_indent >= last_indent:
+ if new_indent == last_indent and node_stack:
+ node_stack.pop()
+ if node_stack:
+ node_stack[-1].children.append(n)
+ node_stack.append(n)
+ else:
+ diff = last_indent - new_indent
+ pop_nodes = diff // 4 + 1
+ del node_stack[-pop_nodes:]
+ node_stack[-1].children.append(n)
+ node_stack.append(n)
+ last_indent = new_indent
+ return node_stack[0]
+
+
+class TestParser:
+
+ def parser_for(self, gram, add_endmarker=True):
+ if add_endmarker:
+ gram += " NEWLINE ENDMARKER\n"
+ pgen = metaparser.ParserGenerator(gram)
+ g = pgen.build_grammar(MyGrammar)
+ return SimpleParser(g), g
+
+ def test_multiple_rules(self):
+ gram = """foo: 'next_rule' bar 'end' NEWLINE ENDMARKER
+bar: NAME NUMBER\n"""
+ p, gram = self.parser_for(gram, False)
+ expected = """
+ foo
+ NAME "next_rule"
+ bar
+ NAME "a_name"
+ NUMBER "42"
+ NAME "end"
+ NEWLINE
+ ENDMARKER"""
+ input = "next_rule a_name 42 end"
+ assert tree_from_string(expected, gram) == p.parse(input)
+
+ def test_recursive_rule(self):
+ gram = """foo: NAME bar STRING NEWLINE ENDMARKER
+bar: NAME [bar] NUMBER\n"""
+ p, gram = self.parser_for(gram, False)
+ expected = """
+ foo
+ NAME "hi"
+ bar
+ NAME "hello"
+ bar
+ NAME "a_name"
+ NUMBER "32"
+ NUMBER "42"
+ STRING "'string'"
+ NEWLINE
+ ENDMARKER"""
+ input = "hi hello a_name 32 42 'string'"
+ assert tree_from_string(expected, gram) == p.parse(input)
+
+ def test_symbol(self):
+ gram = """parent: first_child second_child NEWLINE ENDMARKER
+first_child: NAME age
+second_child: STRING
+age: NUMBER\n"""
+ p, gram = self.parser_for(gram, False)
+ expected = """
+ parent
+ first_child
+ NAME "harry"
+ age
+ NUMBER "13"
+ second_child
+ STRING "'fred'"
+ NEWLINE
+ ENDMARKER"""
+ input = "harry 13 'fred'"
+ assert tree_from_string(expected, gram) == p.parse(input)
+
+ def test_token(self):
+ p, gram = self.parser_for("foo: NAME")
+ expected = """
+ foo
+ NAME "hi"
+ NEWLINE
+ ENDMARKER"""
+ assert tree_from_string(expected, gram) == p.parse("hi")
+ py.test.raises(parser.ParseError, p.parse, "567")
+ p, gram = self.parser_for("foo: NUMBER NAME STRING")
+ expected = """
+ foo
+ NUMBER "42"
+ NAME "hi"
+ STRING "'bar'"
+ NEWLINE
+ ENDMARKER"""
+ assert tree_from_string(expected, gram) == p.parse("42 hi 'bar'")
+
+ def test_optional(self):
+ p, gram = self.parser_for("foo: [NAME] 'end'")
+ expected = """
+ foo
+ NAME "hi"
+ NAME "end"
+ NEWLINE
+ ENDMARKER"""
+ assert tree_from_string(expected, gram) == p.parse("hi end")
+ expected = """
+ foo
+ NAME "end"
+ NEWLINE
+ ENDMARKER"""
+ assert tree_from_string(expected, gram) == p.parse("end")
+
+ def test_grouping(self):
+ p, gram = self.parser_for(
+ "foo: ((NUMBER NAME | STRING) | 'second_option')")
+ expected = """
+ foo
+ NUMBER "42"
+ NAME "hi"
+ NEWLINE
+ ENDMARKER"""
+ assert tree_from_string(expected, gram) == p.parse("42 hi")
+ expected = """
+ foo
+ STRING "'hi'"
+ NEWLINE
+ ENDMARKER"""
+ assert tree_from_string(expected, gram) == p.parse("'hi'")
+ expected = """
+ foo
+ NAME "second_option"
+ NEWLINE
+ ENDMARKER"""
+ assert tree_from_string(expected, gram) == p.parse("second_option")
+ py.test.raises(parser.ParseError, p.parse, "42 a_name 'hi'")
+ py.test.raises(parser.ParseError, p.parse, "42 second_option")
+
+ def test_alternative(self):
+ p, gram = self.parser_for("foo: (NAME | NUMBER)")
+ expected = """
+ foo
+ NAME "hi"
+ NEWLINE
+ ENDMARKER"""
+ assert tree_from_string(expected, gram) == p.parse("hi")
+ expected = """
+ foo
+ NUMBER "42"
+ NEWLINE
+ ENDMARKER"""
+ assert tree_from_string(expected, gram) == p.parse("42")
+ py.test.raises(parser.ParseError, p.parse, "hi 23")
+ py.test.raises(parser.ParseError, p.parse, "23 hi")
+ py.test.raises(parser.ParseError, p.parse, "'some string'")
+
+ def test_keyword(self):
+ p, gram = self.parser_for("foo: 'key'")
+ expected = """
+ foo
+ NAME "key"
+ NEWLINE
+ ENDMARKER"""
+ assert tree_from_string(expected, gram) == p.parse("key")
+ py.test.raises(parser.ParseError, p.parse, "")
+ p, gram = self.parser_for("foo: NAME 'key'")
+ expected = """
+ foo
+ NAME "some_name"
+ NAME "key"
+ NEWLINE
+ ENDMARKER"""
+ assert tree_from_string(expected, gram) == p.parse("some_name key")
+ py.test.raises(parser.ParseError, p.parse, "some_name")
+
+ def test_repeaters(self):
+ p, gram = self.parser_for("foo: NAME+ 'end'")
+ expected = """
+ foo
+ NAME "hi"
+ NAME "bye"
+ NAME "nothing"
+ NAME "end"
+ NEWLINE
+ ENDMARKER"""
+ assert tree_from_string(expected, gram) == p.parse("hi bye nothing end")
+ py.test.raises(parser.ParseError, p.parse, "end")
+ py.test.raises(parser.ParseError, p.parse, "hi bye")
+ p, gram = self.parser_for("foo: NAME* 'end'")
+ expected = """
+ foo
+ NAME "hi"
+ NAME "bye"
+ NAME "end"
+ NEWLINE
+ ENDMARKER"""
+ assert tree_from_string(expected, gram) == p.parse("hi bye end")
+ py.test.raises(parser.ParseError, p.parse, "hi bye")
+ expected = """
+ foo
+ NAME "end"
+ NEWLINE
+ ENDMARKER"""
+ assert tree_from_string(expected, gram) == p.parse("end")
+
+ p, gram = self.parser_for("foo: (NAME | NUMBER)+ 'end'")
+ expected = """
+ foo
+ NAME "a_name"
+ NAME "name_two"
+ NAME "end"
+ NEWLINE
+ ENDMARKER"""
+ assert tree_from_string(expected, gram) == p.parse("a_name name_two end")
+ expected = """
+ foo
+ NUMBER "42"
+ NAME "name"
+ NAME "end"
+ NEWLINE
+ ENDMARKER"""
+ assert tree_from_string(expected, gram) == p.parse("42 name end")
+ py.test.raises(parser.ParseError, p.parse, "end")
+ p, gram = self.parser_for("foo: (NAME | NUMBER)* 'end'")
+ expected = """
+ foo
+ NAME "hi"
+ NUMBER 42
+ NAME "end"
+ NEWLINE
+ ENDMARKER"""
+ assert tree_from_string(expected, gram) == p.parse("hi 42 end")
More information about the Pypy-commit
mailing list