[pypy-svn] r22533 - in pypy/branch/ast-experiments/pypy: interpreter/pyparser interpreter/pyparser/test translator/goal
ludal at codespeak.net
ludal at codespeak.net
Mon Jan 23 16:42:55 CET 2006
Author: ludal
Date: Mon Jan 23 16:42:50 2006
New Revision: 22533
Added:
pypy/branch/ast-experiments/pypy/translator/goal/targetebnfparser.py (contents, props changed)
Modified:
pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfgrammar.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnflexer.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfparse.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/grammar.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/pysymbol.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonlexer.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonutil.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/pytoken.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_astcompiler.py
pypy/branch/ast-experiments/pypy/interpreter/pyparser/tuplebuilder.py
Log:
(adim,ludal)
a new (not yet annotatable) EBNFParser that will build grammar parsers at runtime
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfgrammar.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfgrammar.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfgrammar.py Mon Jan 23 16:42:50 2006
@@ -17,16 +17,15 @@
_count += 1
sym_map[val] = name
sym_rmap[name] = val
+ globals()[name] = val
return val
tok_map = {}
tok_rmap = {}
-def g_add_token( **kwargs ):
+def g_add_token(sym, name):
global _count
- assert len(kwargs) == 1
- sym, name = kwargs.popitem()
if name in tok_rmap:
return tok_rmap[name]
val = _count
@@ -35,13 +34,18 @@
tok_rmap[name] = val
sym_map[val] = sym
sym_rmap[sym] = val
+ globals()[sym] = val
return val
-g_add_token( EOF='EOF' )
+
+
+g_add_token('EOF', 'EOF')
+
def grammar_grammar():
- """NOT RPYTHON (mostly because of g_add_token I suppose)
+ """
+ (mostly because of g_add_token I suppose)
Builds the grammar for the grammar file
Here's the description of the grammar's grammar ::
@@ -51,7 +55,7 @@
alternative: sequence ( '|' sequence )+
star: '*' | '+'
- sequence: (SYMBOL star? | STRING | option | group star? )+
+ sequence: (SYMBOL star? | STRING | option | group )+
option: '[' alternative ']'
group: '(' alternative ')' star?
"""
@@ -59,12 +63,12 @@
S = g_add_symbol
T = g_add_token
# star: '*' | '+'
- star = Alternative( S("star"), [Token(T(TOK_STAR='*')), Token(T(TOK_ADD='+'))] )
+ star = Alternative( S("star"), [Token(T('TOK_STAR', '*')), Token(T('TOK_ADD', '+'))] )
star_opt = KleeneStar ( S("star_opt"), 0, 1, rule=star )
# rule: SYMBOL ':' alternative
- symbol = Sequence( S("symbol"), [Token(T(TOK_SYMBOL='SYMBOL')), star_opt] )
- symboldef = Token( T(TOK_SYMDEF="SYMDEF") )
+ symbol = Sequence( S("symbol"), [Token(T('TOK_SYMBOL', 'SYMBOL')), star_opt] )
+ symboldef = Token( T('TOK_SYMDEF', 'SYMDEF') )
alternative = Sequence( S("alternative"), [])
rule = Sequence( S("rule"), [symboldef, alternative] )
@@ -73,19 +77,19 @@
# alternative: sequence ( '|' sequence )*
sequence = KleeneStar( S("sequence"), 1 )
- seq_cont_list = Sequence( S("seq_cont_list"), [Token(T(TOK_BAR='|')), sequence] )
+ seq_cont_list = Sequence( S("seq_cont_list"), [Token(T('TOK_BAR', '|')), sequence] )
sequence_cont = KleeneStar( S("sequence_cont"),0, rule=seq_cont_list )
alternative.args = [ sequence, sequence_cont ]
# option: '[' alternative ']'
- option = Sequence( S("option"), [Token(T(TOK_LBRACKET='[')), alternative, Token(T(TOK_RBRACKET=']'))] )
+ option = Sequence( S("option"), [Token(T('TOK_LBRACKET', '[')), alternative, Token(T('TOK_RBRACKET', ']'))] )
# group: '(' alternative ')'
- group = Sequence( S("group"), [Token(T(TOK_LPAR='(')), alternative, Token(T(TOK_RPAR=')')), star_opt] )
+ group = Sequence( S("group"), [Token(T('TOK_LPAR', '(')), alternative, Token(T('TOK_RPAR', ')')), star_opt] )
# sequence: (SYMBOL | STRING | option | group )+
- string = Token(T(TOK_STRING='STRING'))
+ string = Token(T('TOK_STRING', 'STRING'))
alt = Alternative( S("sequence_alt"), [symbol, string, option, group] )
sequence.args = [ alt ]
@@ -106,4 +110,4 @@
del _value
del grammar_grammar
del g_add_symbol
-del g_add_token
+# del g_add_token
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnflexer.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnflexer.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnflexer.py Mon Jan 23 16:42:50 2006
@@ -3,7 +3,7 @@
analyser in grammar.py
"""
-from grammar import TokenSource, Token
+from grammar import TokenSource, Token, AbstractContext
from ebnfgrammar import *
@@ -15,6 +15,12 @@
idx+=1
return idx
+
+class GrammarSourceContext(AbstractContext):
+ def __init__(self, pos, peek):
+ self.pos = pos
+ self.peek = peek
+
class GrammarSource(TokenSource):
"""Fully RPython - see targetebnflexer.py
The grammar tokenizer
@@ -25,8 +31,8 @@
SYMBOL: a rule symbol usually appeary right of a SYMDEF
tokens: '[', ']', '(' ,')', '*', '+', '|'
"""
- def __init__(self, inpstring ):
- TokenSource.__init__(self)
+ def __init__(self, inpstring):
+ # TokenSource.__init__(self)
self.input = inpstring
self.pos = 0
self.begin = 0
@@ -36,7 +42,7 @@
def context(self):
"""returns an opaque context object, used to backtrack
to a well known position in the parser"""
- return self.pos, self._peeked
+ return GrammarSourceContext( self.pos, self._peeked )
def offset(self, ctx=None):
"""Returns the current parsing position from the start
@@ -44,14 +50,17 @@
if ctx is None:
return self.pos
else:
- assert type(ctx)==int
- return ctx
+ assert isinstance(ctx, GrammarSourceContext)
+ return ctx.pos
def restore(self, ctx):
"""restore the context provided by context()"""
- self.pos, self._peeked = ctx
+ assert isinstance( ctx, GrammarSourceContext )
+ self.pos = ctx.pos
+ self._peeked = ctx.peek
+
- def current_line(self):
+ def current_linesource(self):
pos = idx = self.begin
inp = self.input
end = len(inp)
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfparse.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfparse.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/ebnfparse.py Mon Jan 23 16:42:50 2006
@@ -1,6 +1,7 @@
#!/usr/bin/env python
-from grammar import BaseGrammarBuilder, Alternative, Sequence, Token, \
- KleeneStar, GrammarElement, build_first_sets, EmptyToken
+from grammar import BaseGrammarBuilder, Alternative, Sequence, Token
+from grammar import GrammarProxy, KleeneStar, GrammarElement, build_first_sets
+from grammar import EmptyToken, AbstractBuilder, AbstractContext
from ebnflexer import GrammarSource
import ebnfgrammar
from ebnfgrammar import GRAMMAR_GRAMMAR, sym_map
@@ -8,8 +9,32 @@
import pytoken
import pysymbol
-import re
-py_name = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*", re.M)
+
+ORDA = ord("A")
+ORDZ = ord("Z")
+ORDa = ord("a")
+ORDz = ord("z")
+ORD0 = ord("0")
+ORD9 = ord("9")
+ORD_ = ord("_")
+
+def is_py_name( name ):
+ if len(name)<1:
+ return False
+ v = ord(name[0])
+ if not (ORDA <= v <= ORDZ or
+ ORDa <= v <= ORDz or v == ORD_ ):
+ return False
+ for c in name:
+ v = ord(c)
+ if not (ORDA <= v <= ORDZ or
+ ORDa <= v <= ORDz or
+ ORD0 <= v <= ORD9 or
+ v == ORD_ ):
+ return False
+ return True
+
+
punct=['>=', '<>', '!=', '<', '>', '<=', '==', '\\*=',
'//=', '%=', '^=', '<<=', '\\*\\*=', '\\', '=',
@@ -19,6 +44,8 @@
'@', '\\[', '\\]', '`', '\\{', '\\}']
+
+
TERMINALS = [
'NAME', 'NUMBER', 'STRING', 'NEWLINE', 'ENDMARKER',
'INDENT', 'DEDENT' ]
@@ -46,7 +73,7 @@
"""
ctx = source.context()
tk = source.next()
- if tk.codename==self.codename:
+ if tk.codename == self.codename:
if tk.value not in self.keywords:
ret = builder.token( tk.codename, tk.value, source )
return self.debug_return( ret, tk.codename, tk.value )
@@ -57,7 +84,7 @@
"""special case of match token for tokens which are really keywords
"""
if not isinstance(other, Token):
- raise RuntimeError("Unexpected token type %r" % other)
+ raise RuntimeError("Unexpected token type")
if other is EmptyToken:
return False
if other.codename != self.codename:
@@ -75,7 +102,8 @@
# we do a pass through the variables to detect
# terminal symbols from non terminals
for r in self.items:
- for i,a in enumerate(r.args):
+ for i in range(len(r.args)):
+ a = r.args[i]
if a.codename in self.rules:
assert isinstance(a,Token)
r.args[i] = self.rules[a.codename]
@@ -148,7 +176,7 @@
value = node.value
tokencode = pytoken.tok_punct.get( value, None )
if tokencode is None:
- if not py_name.match( value ):
+ if not is_py_name( value ):
raise RuntimeError("Unknown STRING value ('%s')" % value )
# assume a keyword
tok = Token( pytoken.NAME, value )
@@ -177,6 +205,160 @@
raise RuntimeError("Unknown Visitor for %r" % node.name)
+
+class EBNFBuilder(AbstractBuilder):
+ """Build a grammar tree"""
+ def __init__(self, rules=None, debug=0, symbols=None ):
+ if symbols is None:
+ symbols = pysymbol.SymbolMapper()
+ AbstractBuilder.__init__(self, rules, debug, symbols)
+ self.rule_stack = []
+ self.root_rules = {}
+ self.keywords = []
+ self.seqcounts = [] # number of items in the current sequence
+ self.altcounts = [] # number of sequence in the current alternative
+ self.curaltcount = 0
+ self.curseqcount = 0
+ self.current_subrule = 0
+ self.current_rule = -1
+
+ def new_symbol(self):
+ current_rule_name = self.symbols.sym_name.get(self.current_rule,"x")
+ rule_name = ":" + current_rule_name + "_%d" % self.current_subrule
+ self.current_subrule += 1
+ symval = self.symbols.add_anon_symbol( rule_name )
+ return symval
+
+ def get_symbolcode(self, name ):
+ codename = self.symbols.sym_values.get( name, -1 )
+ if codename == -1:
+ codename = self.symbols.add_symbol( name )
+ return codename
+
+ def get_rule( self, name ):
+ codename = self.get_symbolcode( name )
+ if codename in self.root_rules:
+ return self.root_rules[codename]
+ proxy = GrammarProxy( codename )
+ self.root_rules[codename] = proxy
+ return proxy
+
+ def context(self):
+ """Return an opaque context object"""
+ return None
+
+ def restore(self, ctx):
+ """Accept an opaque context object"""
+ assert False, "Not supported"
+
+ def alternative(self, rule, source):
+# print " alternative", rule.display(level=0,symbols=ebnfgrammar.sym_map)
+ return True
+
+ def pop_rules( self, count ):
+ offset = len(self.rule_stack)-count
+ assert offset>=0
+ rules = self.rule_stack[offset:]
+ del self.rule_stack[offset:]
+ return rules
+
+ def sequence(self, rule, source, elts_number):
+# print " sequence", rule.display(level=0,symbols=ebnfgrammar.sym_map)
+ _rule = rule.codename
+ if _rule == ebnfgrammar.sequence:
+# print " -sequence", self.curaltcount, self.curseqcount
+ if self.curseqcount==1:
+ self.curseqcount = 0
+ self.curaltcount += 1
+ return True
+ rules = self.pop_rules(self.curseqcount)
+ new_rule = Sequence( self.new_symbol(), rules )
+ self.rule_stack.append( new_rule )
+ self.curseqcount = 0
+ self.curaltcount += 1
+ elif _rule == ebnfgrammar.alternative:
+# print " -alternative", self.curaltcount, self.curseqcount
+ if self.curaltcount == 1:
+ self.curaltcount = 0
+ return True
+ rules = self.pop_rules(self.curaltcount)
+ new_rule = Alternative( self.new_symbol(), rules )
+ self.rule_stack.append( new_rule )
+ self.curaltcount = 0
+ elif _rule == ebnfgrammar.group:
+# print " -group", self.curaltcount, self.curseqcount
+ self.curseqcount += 1
+ elif _rule == ebnfgrammar.option:
+# print " -option", self.curaltcount, self.curseqcount
+ self.curseqcount += 1
+ elif _rule == ebnfgrammar.rule:
+# print " -rule", self.curaltcount, self.curseqcount
+ assert len(self.rule_stack)==1
+ old_rule = self.rule_stack[0]
+ del self.rule_stack[0]
+ old_rule.codename = self.current_rule
+ self.root_rules[self.current_rule] = old_rule
+ self.current_subrule = 0
+ return True
+
+ def token(self, name, value, source):
+# print "token", name, value
+ if name == ebnfgrammar.TOK_STRING:
+ self.handle_TOK_STRING( name, value )
+ self.curseqcount += 1
+ elif name == ebnfgrammar.TOK_SYMDEF:
+ self.current_rule = self.get_symbolcode( value )
+ elif name == ebnfgrammar.TOK_SYMBOL:
+ rule = self.get_rule( value )
+ self.rule_stack.append( rule )
+ self.curseqcount += 1
+ elif name == ebnfgrammar.TOK_STAR:
+ top = self.rule_stack[-1]
+ rule = KleeneStar( self.new_symbol(), _min=0, rule=top)
+ self.rule_stack[-1] = rule
+ elif name == ebnfgrammar.TOK_ADD:
+ top = self.rule_stack[-1]
+ rule = KleeneStar( self.new_symbol(), _min=1, rule=top)
+ self.rule_stack[-1] = rule
+ elif name == ebnfgrammar.TOK_BAR:
+ assert self.curseqcount == 0
+ elif name == ebnfgrammar.TOK_LPAR:
+ self.altcounts.append( self.curaltcount )
+ self.seqcounts.append( self.curseqcount )
+ self.curseqcount = 0
+ self.curaltcount = 0
+ elif name == ebnfgrammar.TOK_RPAR:
+ assert self.curaltcount == 0
+ self.curaltcount = self.altcounts.pop()
+ self.curseqcount = self.seqcounts.pop()
+ elif name == ebnfgrammar.TOK_LBRACKET:
+ self.altcounts.append( self.curaltcount )
+ self.seqcounts.append( self.curseqcount )
+ self.curseqcount = 0
+ self.curaltcount = 0
+ elif name == ebnfgrammar.TOK_RBRACKET:
+ assert self.curaltcount == 0
+ assert self.curseqcount == 0
+ self.curaltcount = self.altcounts.pop()
+ self.curseqcount = self.seqcounts.pop()
+ return True
+
+ def handle_TOK_STRING( self, name, value ):
+ try:
+ tokencode = pytoken.tok_punct[value]
+ except KeyError:
+ if not is_py_name(value):
+ raise RuntimeError("Unknown STRING value ('%s')" % value)
+ # assume a keyword
+ tok = Token(pytoken.NAME, value)
+ if value not in self.keywords:
+ self.keywords.append(value)
+ else:
+ # punctuation
+ tok = Token(tokencode, None)
+ self.rule_stack.append(tok)
+
+
class EBNFVisitor(AbstractSyntaxVisitor):
def __init__(self):
@@ -190,7 +372,8 @@
self.symbols = pysymbol.SymbolMapper( pysymbol._cpython_symbols.sym_name )
def new_symbol(self):
- rule_name = ":%s_%s" % (self.current_rule, self.current_subrule)
+ current_rule_name = self.symbols.sym_name.get(self.current_rule,"x")
+ rule_name = ":" + self.current_rule + "_" + str(self.current_subrule)
self.current_subrule += 1
symval = self.symbols.add_anon_symbol( rule_name )
return symval
@@ -241,25 +424,24 @@
node.visit(vis)
return vis
+
def parse_grammar_text(txt):
"""parses a grammar input
stream : file-like object representing the grammar to parse
"""
source = GrammarSource(txt)
- builder = BaseGrammarBuilder()
+ builder = EBNFBuilder(pysymbol._cpython_symbols)
result = GRAMMAR_GRAMMAR.match(source, builder)
- node = builder.stack[-1]
- vis = EBNFVisitor()
- node.visit(vis)
- return vis
+ return builder
def target_parse_grammar_text(txt):
vis = parse_grammar_text(txt)
# do nothing
+ return None
-from pprint import pprint
-if __name__ == "__main__":
+def main_build():
+ from pprint import pprint
grambuild = parse_grammar(file('data/Grammar2.3'))
for i,r in enumerate(grambuild.items):
print "% 3d : %s" % (i, r)
@@ -267,3 +449,9 @@
pprint(grambuild.tokens)
print "|".join(grambuild.tokens.keys() )
+def main_build():
+ import sys
+ return parse_grammar_text( file(sys.argv[-1]).read() )
+
+if __name__ == "__main__":
+ result = main_build()
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/grammar.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/grammar.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/grammar.py Mon Jan 23 16:42:50 2006
@@ -10,10 +10,13 @@
try:
from pypy.interpreter.baseobjspace import Wrappable
from pypy.interpreter.pyparser.pytoken import NULLTOKEN
+ from pypy.interpreter.pyparser.pysymbol import SymbolMapper
except ImportError:
# allows standalone testing
Wrappable = object
- NULLTOKEN = None
+ NULLTOKEN = -1 # None
+ from pysymbol import SymbolMapper
+
from syntaxtree import SyntaxNode, TempSyntaxNode, TokenNode
@@ -28,6 +31,7 @@
else:
return "["+str(codename)+"]"
+
#### Abstract interface for a lexer/tokenizer
class TokenSource(object):
"""Abstract base class for a source tokenizer"""
@@ -50,7 +54,7 @@
of the context"""
return -1
- def current_line(self):
+ def current_linesource(self):
"""Returns the current line"""
return ""
@@ -102,7 +106,7 @@
class AbstractBuilder(object):
"""Abstract base class for builder objects"""
- def __init__(self, rules=None, debug=0, symbols={} ):
+ def __init__(self, symbols, rules=None, debug=0 ):
# a dictionary of grammar rules for debug/reference
if rules is not None:
self.rules = rules
@@ -110,7 +114,9 @@
self.rules = {}
# This attribute is here for convenience
self.debug = debug
- self.symbols = symbols # mapping from codename to symbols
+ # mapping from codename to symbols
+ assert isinstance( symbols, SymbolMapper )
+ self.symbols = symbols
def context(self):
"""Return an opaque context object"""
@@ -143,6 +149,8 @@
class BaseGrammarBuilder(AbstractBuilder):
"""Base/default class for a builder"""
def __init__(self, rules=None, debug=0, symbols={} ):
+ if rules is None:
+ rules = SymbolMapper()
AbstractBuilder.__init__(self, rules, debug, symbols )
# stacks contain different objects depending on the builder class
# to be RPython they should not be defined in the base class
@@ -355,6 +363,23 @@
+class GrammarProxy(GrammarElement):
+ def __init__(self, rule_name ):
+ GrammarElement.__init__(self, -1)
+ self.rule_name = rule_name
+ self.object = None
+
+ def display(self, level=0, symbols={}):
+ """Helper function used to represent the grammar.
+ mostly used for debugging the grammar itself"""
+ name = symbols.get(self.rule_name,str(self.rule_name))
+ repr = "Proxy("+name
+ if self.object:
+ repr+=","+self.object.display(level=1,symbols=symbols)
+ repr += ")"
+ return repr
+
+
class Alternative(GrammarElement):
"""Represents an alternative in a grammar rule (as in S -> A | B | C)"""
@@ -553,7 +578,7 @@
"""
if DEBUG > 1:
print "try kle:", self.display(0,builder.symbols)
- ctx = 0
+ ctx = None
bctx = None
if self.min:
ctx = source.context()
@@ -625,8 +650,8 @@
class Token(GrammarElement):
"""Represents a Token in a grammar rule (a lexer token)"""
- def __init__( self, codename, value = None):
- GrammarElement.__init__( self, codename )
+ def __init__(self, codename, value=None):
+ GrammarElement.__init__(self, codename)
self.value = value
self.first_set = [self]
# self.first_set = {self: 1}
@@ -673,7 +698,7 @@
the comparison algorithm is similar to the one in match()
"""
if not isinstance(other, Token):
- raise RuntimeError("Unexpected token type %r" % other)
+ raise RuntimeError("Unexpected token type")
if other is EmptyToken:
return False
res = other.codename == self.codename and self.value in [None, other.value]
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/pysymbol.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/pysymbol.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/pysymbol.py Mon Jan 23 16:42:50 2006
@@ -27,7 +27,7 @@
self._count = _count
def add_symbol( self, sym ):
- assert type(sym)==str
+ # assert isinstance(sym, str)
if not sym in self.sym_values:
self._count += 1
val = self._count
@@ -37,7 +37,7 @@
return self.sym_values[ sym ]
def add_anon_symbol( self, sym ):
- assert type(sym)==str
+ # assert isinstance(sym, str)
if not sym in self.sym_values:
self._anoncount -= 1
val = self._anoncount
@@ -48,7 +48,7 @@
def __getitem__(self, sym ):
"""NOT RPYTHON"""
- assert type(sym)==str
+ # assert isinstance(sym, str)
return self.sym_values[ sym ]
@@ -65,7 +65,7 @@
"""Update the symbol module according to rules
in PythonParser instance : parser"""
for rule in parser.rules:
- _cpython_symbols.add_symbol( rule )
+ _cpython_symbols.add_symbol(rule)
# There is no symbol in this module until the grammar is loaded
# once loaded the grammar parser will fill the mappings with the
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonlexer.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonlexer.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonlexer.py Mon Jan 23 16:42:50 2006
@@ -5,7 +5,7 @@
import sys
from codeop import PyCF_DONT_IMPLY_DEDENT
-from pypy.interpreter.pyparser.grammar import TokenSource, Token
+from pypy.interpreter.pyparser.grammar import TokenSource, Token, AbstractContext
from pypy.interpreter.pyparser.error import SyntaxError
import pytoken
from pytoken import NEWLINE
@@ -293,6 +293,11 @@
#print '----------------------------------------- pyparser/pythonlexer.py'
return token_list
+
+class PythonSourceContext(AbstractContext):
+ def __init__(self, pos ):
+ self.pos = pos
+
class PythonSource(TokenSource):
"""This source uses Jonathan's tokenizer"""
def __init__(self, strings, flags=0):
@@ -317,7 +322,7 @@
self._offset = pos
return tok
- def current_line(self):
+ def current_linesource(self):
"""Returns the current line being parsed"""
return self._current_line
@@ -327,11 +332,12 @@
def context(self):
"""Returns an opaque context object for later restore"""
- return self.stack_pos
+ return PythonSourceContext(self.stack_pos)
def restore(self, ctx):
"""Restores a context"""
- self.stack_pos = ctx
+ assert isinstance(ctx, PythonSourceContext)
+ self.stack_pos = ctx.pos
def peek(self):
"""returns next token without consuming it"""
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonutil.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonutil.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/pythonutil.py Mon Jan 23 16:42:50 2006
@@ -6,6 +6,7 @@
from tuplebuilder import TupleBuilder
from astbuilder import AstBuilder
from pypy.interpreter.pyparser import pysymbol
+import pysymbol
PYTHON_PARSER = pythonparse.PYTHON_PARSER
TARGET_DICT = {
@@ -60,7 +61,7 @@
tuples (StackElement is only a wrapper class around these tuples)
"""
- builder = TupleBuilder(PYTHON_PARSER.rules, lineno=False)
+ builder = TupleBuilder(pysymbol._cpython_symbols, PYTHON_PARSER.rules, lineno=False)
if space is not None:
builder.space = space
target_rule = TARGET_DICT[mode]
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/pytoken.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/pytoken.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/pytoken.py Mon Jan 23 16:42:50 2006
@@ -5,21 +5,21 @@
N_TOKENS = 0
-tok_name = {}
-tok_values = {}
+# This is used to replace None
+NULLTOKEN = -1
+
+tok_name = {-1 : 'NULLTOKEN'}
+tok_values = {'NULLTOKEN' : -1}
-def add_token(name, value=None):
+def add_token(name):
global N_TOKENS
- if value is None:
- value = N_TOKENS
- N_TOKENS += 1
+ value = N_TOKENS
+ N_TOKENS += 1
_g = globals()
_g[name] = value
tok_name[value] = name
tok_values[name] = value
-# This is used to replace None
-add_token( 'NULLTOKEN', -1 )
# For compatibility, this produces the same constant values as Python 2.4.
add_token( 'ENDMARKER' )
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_astcompiler.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_astcompiler.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/test/test_astcompiler.py Mon Jan 23 16:42:50 2006
@@ -3,6 +3,7 @@
from pypy.interpreter.pyparser.astbuilder import AstBuilder
from pypy.interpreter.pyparser.tuplebuilder import TupleBuilder
from pypy.interpreter.pycode import PyCode
+from pypy.interpreter.pyparser.pysymbol import _cpython_symbols
import py.test
def setup_module(mod):
@@ -82,7 +83,7 @@
def compile_with_testcompiler(expr, target='exec', space=FakeSpace()):
target2 = TARGET_DICT['exec'] # xxx exec: single not really tested
- builder = TupleBuilder()
+ builder = TupleBuilder(_cpython_symbols)
PYTHON_PARSER.parse_source(expr, target2, builder)
tuples = builder.stack[-1].as_tuple(True)
from pypy.interpreter.stablecompiler import transformer, pycodegen, misc
@@ -221,7 +222,7 @@
for snippet_name in LIBSTUFF:
filepath = os.path.join(os.path.dirname(__file__), '../../../lib', snippet_name)
source = file(filepath).read()
- yield check_compile, source, 'exec'
+ yield check_compile, source, 'exec'
def test_single_inputs():
Modified: pypy/branch/ast-experiments/pypy/interpreter/pyparser/tuplebuilder.py
==============================================================================
--- pypy/branch/ast-experiments/pypy/interpreter/pyparser/tuplebuilder.py (original)
+++ pypy/branch/ast-experiments/pypy/interpreter/pyparser/tuplebuilder.py Mon Jan 23 16:42:50 2006
@@ -1,6 +1,7 @@
from grammar import AbstractBuilder, AbstractContext
from pytoken import tok_name, tok_rpunct, NEWLINE, INDENT, DEDENT, ENDMARKER
+import pysymbol
class StackElement:
"""wraps TupleBuilder's tuples"""
@@ -57,8 +58,8 @@
class TupleBuilder(AbstractBuilder):
"""A builder that directly produce the AST"""
- def __init__(self, rules=None, debug=0, lineno=True):
- AbstractBuilder.__init__(self, rules, debug)
+ def __init__(self, symbols, rules=None, debug=0, lineno=True):
+ AbstractBuilder.__init__(self, symbols, rules, debug)
# This attribute is here for convenience
self.source_encoding = None
self.lineno = lineno
Added: pypy/branch/ast-experiments/pypy/translator/goal/targetebnfparser.py
==============================================================================
--- (empty file)
+++ pypy/branch/ast-experiments/pypy/translator/goal/targetebnfparser.py Mon Jan 23 16:42:50 2006
@@ -0,0 +1,36 @@
+from pypy.interpreter.pyparser.ebnfparse import target_parse_grammar_text
+from pypy.annotation import policy as annpolicy
+
+
+entry_point = target_parse_grammar_text
+
+# _____ Define and setup target ___
+
+def target(*args):
+ policy = annpolicy.AnnotatorPolicy()
+ policy.allow_someobjects = False
+ return entry_point, [str]# , policy
+
+def get_llinterp_args():
+ return [1]
+
+# _____ Run translated _____
+def run(c_entry_point):
+ import sys
+ NBC=100
+ import time
+ src = file("../../interpreter/pyparser/data/Grammar2.4").read()
+ print "Translated:"
+ t1 = time.time()
+ for i in range(NBC):
+ c_entry_point( src )
+ t2 = time.time()
+ print "%8.5f sec/loop" % (float(t2-t1)/NBC)
+ print "CPython:"
+ t1 = time.time()
+ for i in range(NBC):
+ entry_point( src )
+ t2 = time.time()
+ print "%8.5f sec/loop" % (float(t2-t1)/NBC)
+
+
More information about the Pypy-commit
mailing list