[pypy-svn] r54603 - in pypy/branch/gc-tweak/pypy/interpreter/pyparser: . test
arigo at codespeak.net
arigo at codespeak.net
Fri May 9 18:30:43 CEST 2008
Author: arigo
Date: Fri May 9 18:30:43 2008
New Revision: 54603
Modified:
pypy/branch/gc-tweak/pypy/interpreter/pyparser/grammar.py
pypy/branch/gc-tweak/pypy/interpreter/pyparser/test/test_lookahead.py
Log:
Merge r54601 from the trunk.
Modified: pypy/branch/gc-tweak/pypy/interpreter/pyparser/grammar.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/interpreter/pyparser/grammar.py (original)
+++ pypy/branch/gc-tweak/pypy/interpreter/pyparser/grammar.py Fri May 9 18:30:43 2008
@@ -183,18 +183,23 @@
class GrammarElement(Wrappable):
"""Base parser class"""
+ _trace = False
+ first_set = None
+ emptytoken_in_first_set = False
+ _match_cache = None
+ args = []
+
symbols = {} # dirty trick to provide a symbols mapping while printing (and not putting it in every object)
+ _attrs_ = ['parser', 'codename', 'args',
+ 'first_set', 'emptytoken_in_first_set', '_match_cache']
+
def __init__(self, parser, codename):
# the rule name
assert isinstance(parser, Parser)
self.parser = parser
# integer mapping to either a token value or rule symbol value
self.codename = codename
- self.args = []
- self.first_set = {}
- self.emptytoken_in_first_set = False
- self._trace = False
def is_root(self):
"""This is a root node of the grammar, that is one that will
@@ -256,7 +261,7 @@
prefix = '%s%s' % (' ' * level, prefix)
print prefix, " RULE =", self
print prefix, " TOKEN =", token
- print prefix, " FIRST SET =", self.first_set
+ print prefix, " FIRST SET =", getattr(self, 'first_set', 'none')
def _match(self, source, builder, level=0):
"""Try to match a grammar rule
@@ -313,8 +318,15 @@
"""returns the list of possible next tokens
*must* be implemented in subclasses
"""
- # XXX: first_set could probably be implemented with sets
- return []
+ pass
+
+ def get_first_set(self):
+ if self.first_set is None:
+ self.initialize_first_set()
+ return self.first_set
+
+ def initialize_first_set(self):
+ self.first_set = {}
def optimize_first_set(self):
"""Precompute a data structure that optimizes match_first_set().
@@ -353,9 +365,8 @@
- other.codename == tk.codename
- other.value == tk.value or tk.value is None
"""
- try:
- cachelist = self._match_cache
- except AttributeError:
+ cachelist = self._match_cache
+ if cachelist is None:
return True # not computed yet
cache = cachelist[other.isKeyword]
values = cache.get(other.codename, GrammarElement._EMPTY_VALUES_SET)
@@ -440,7 +451,7 @@
"""
# do this to avoid problems on indirect recursive rules
for rule in self.args:
- for t in rule.first_set:
+ for t in rule.get_first_set():
self.first_set[t] = None
def reorder_rule(self):
@@ -538,7 +549,7 @@
LAH(S) = LAH(A)
"""
for rule in self.args:
- if not rule.first_set:
+ if not rule.get_first_set():
break
if self.parser.EmptyToken in self.first_set:
del self.first_set[self.parser.EmptyToken]
@@ -573,6 +584,9 @@
raise ValueError("KleeneStar needs max==-1 or max>1")
self.max = _max
self.star = "x"
+
+ def initialize_first_set(self):
+ GrammarElement.initialize_first_set(self)
if self.min == 0:
self.first_set[self.parser.EmptyToken] = None
@@ -635,7 +649,7 @@
LAH(S) = LAH(A)
"""
rule = self.args[0]
- self.first_set = rule.first_set.copy()
+ self.first_set = rule.get_first_set().copy()
if self.min == 0:
self.first_set[self.parser.EmptyToken] = None
@@ -657,9 +671,13 @@
class Token(GrammarElement):
"""Represents a Token in a grammar rule (a lexer token)"""
isKeyword = True
+ _attrs_ = ['isKeyword', 'value']
+
def __init__(self, parser, codename, value=None):
GrammarElement.__init__(self, parser, codename)
self.value = value
+
+ def initialize_first_set(self):
self.first_set = {self: None}
def match(self, source, builder, level=0):
@@ -813,6 +831,8 @@
full first sets.
"""
rules = self.all_rules
+ for r in rules:
+ r.initialize_first_set()
changed = True
while changed:
# loop while one first set is changed
Modified: pypy/branch/gc-tweak/pypy/interpreter/pyparser/test/test_lookahead.py
==============================================================================
--- pypy/branch/gc-tweak/pypy/interpreter/pyparser/test/test_lookahead.py (original)
+++ pypy/branch/gc-tweak/pypy/interpreter/pyparser/test/test_lookahead.py Fri May 9 18:30:43 2008
@@ -12,27 +12,27 @@
self.parser.build_first_sets()
def test_basic_token(self):
- assert self.tok1.first_set == {self.tok1: None}
+ assert self.tok1.get_first_set() == {self.tok1: None}
def test_basic_alternative(self):
alt = self.parser.Alternative_n("a1t", self.tokens)
self.parser.build_first_sets()
- assert alt.first_set == dict.fromkeys(self.tokens)
+ assert alt.get_first_set() == dict.fromkeys(self.tokens)
def test_basic_sequence(self):
seq = self.parser.Sequence_n("seq", self.tokens)
self.parser.build_first_sets()
- assert seq.first_set == {self.tokens[0]: None}
+ assert seq.get_first_set() == {self.tokens[0]: None}
def test_basic_kleenstar(self):
tok1, tok2, tok3 = self.tokens
kstar1 = self.parser.KleeneStar_n("k", 1, 3, tok1)
kstar2 = self.parser.KleeneStar_n("k2", 0, 3, tok1)
self.parser.build_first_sets()
- assert kstar1.first_set == {tok1: None}
- assert kstar2.first_set == {tok1: None,
- self.parser.EmptyToken: None}
+ assert kstar1.get_first_set() == {tok1: None}
+ assert kstar2.get_first_set() == {tok1: None,
+ self.parser.EmptyToken: None}
def test_maybe_empty_sequence(self):
@@ -44,9 +44,9 @@
k2 = self.parser.KleeneStar_n("k2", 0, 2, tok2)
seq = self.parser.Sequence_n( "seq", [k1, k2])
self.parser.build_first_sets()
- assert seq.first_set == {tok1: None,
- tok2: None,
- self.parser.EmptyToken: None}
+ assert seq.get_first_set() == {tok1: None,
+ tok2: None,
+ self.parser.EmptyToken: None}
def test_not_empty_sequence(self):
@@ -58,7 +58,7 @@
k2 = self.parser.KleeneStar_n("k2", 1, 2, tok2)
seq = self.parser.Sequence_n("seq", [k1, k2])
self.parser.build_first_sets()
- assert seq.first_set == {tok1: None, tok2: None}
+ assert seq.get_first_set() == {tok1: None, tok2: None}
def test_token_comparison(self):
tok1 = self.parser.Token_n( "tok1", "foo" )
More information about the Pypy-commit
mailing list