[pypy-svn] r39609 - in pypy/dist/pypy: config doc/config interpreter interpreter/astcompiler interpreter/pyparser interpreter/pyparser/test interpreter/stablecompiler module/dyngram module/recparser module/recparser/hooksamples module/recparser/test
afayolle at codespeak.net
afayolle at codespeak.net
Wed Feb 28 18:30:52 CET 2007
Author: afayolle
Date: Wed Feb 28 18:30:48 2007
New Revision: 39609
Added:
pypy/dist/pypy/doc/config/objspace.usemodules.dyngram.txt (contents, props changed)
pypy/dist/pypy/interpreter/pyparser/asthelper.py (contents, props changed)
pypy/dist/pypy/interpreter/pyparser/test/expressions.py (contents, props changed)
pypy/dist/pypy/interpreter/pyparser/test/test_parser.py (contents, props changed)
pypy/dist/pypy/module/dyngram/ (props changed)
pypy/dist/pypy/module/dyngram/__init__.py (contents, props changed)
pypy/dist/pypy/module/recparser/hooksamples/ (props changed)
pypy/dist/pypy/module/recparser/hooksamples/constchanger.py (contents, props changed)
pypy/dist/pypy/module/recparser/hooksamples/tracer.py (contents, props changed)
pypy/dist/pypy/module/recparser/test/test_dyn_grammarrules.py (contents, props changed)
Modified:
pypy/dist/pypy/config/pypyoption.py
pypy/dist/pypy/interpreter/astcompiler/ast.py
pypy/dist/pypy/interpreter/astcompiler/ast.txt
pypy/dist/pypy/interpreter/astcompiler/astgen.py
pypy/dist/pypy/interpreter/pycompiler.py
pypy/dist/pypy/interpreter/pyparser/astbuilder.py
pypy/dist/pypy/interpreter/pyparser/ebnfgrammar.py
pypy/dist/pypy/interpreter/pyparser/ebnflexer.py
pypy/dist/pypy/interpreter/pyparser/ebnfparse.py
pypy/dist/pypy/interpreter/pyparser/grammar.py
pypy/dist/pypy/interpreter/pyparser/pysymbol.py
pypy/dist/pypy/interpreter/pyparser/pythonlexer.py
pypy/dist/pypy/interpreter/pyparser/pythonparse.py
pypy/dist/pypy/interpreter/pyparser/pythonutil.py
pypy/dist/pypy/interpreter/pyparser/pytoken.py
pypy/dist/pypy/interpreter/pyparser/syntaxtree.py
pypy/dist/pypy/interpreter/pyparser/test/test_astbuilder.py
pypy/dist/pypy/interpreter/pyparser/test/test_astcompiler.py
pypy/dist/pypy/interpreter/pyparser/test/test_lookahead.py
pypy/dist/pypy/interpreter/pyparser/test/test_pytokenizer.py
pypy/dist/pypy/interpreter/pyparser/test/test_samples.py
pypy/dist/pypy/interpreter/pyparser/tuplebuilder.py
pypy/dist/pypy/interpreter/stablecompiler/transformer.py
pypy/dist/pypy/module/recparser/__init__.py
pypy/dist/pypy/module/recparser/codegen.py
pypy/dist/pypy/module/recparser/compat.py
pypy/dist/pypy/module/recparser/pyparser.py
pypy/dist/pypy/module/recparser/test/test_parser.py
Log:
merge ast-experiments branch in the trunk.
The ast branch being in a weird state, I'm merging this by hand (as suggested
on IRC by mwh and cfbolz). Here follows the commit messages for the
ast-experiments branch.
------------------------------------------------------------------------
r39399 | afayolle | 2007-02-26 08:45:45 +0100 (lun, 26 fév 2007) | 5 lines
merge trunk with branch
svn merge -r 38798:39398 svn+ssh://codespeak.net/svn/pypy/dist
------------------------------------------------------------------------
r39210 | mwh | 2007-02-19 15:49:22 +0100 (lun, 19 fév 2007) | 2 lines
fix shallow failures in test_dyn_grammarrules.py
------------------------------------------------------------------------
r38879 | adim | 2007-02-15 14:26:12 +0100 (jeu, 15 fév 2007) | 1 line
small doc for the --withmod-dyngram option
------------------------------------------------------------------------
r38877 | adim | 2007-02-15 14:13:49 +0100 (jeu, 15 fév 2007) | 4 lines
- made a special "dyngram" module that exports the runtime grammar
modifications ability
------------------------------------------------------------------------
r38854 | adim | 2007-02-14 17:33:38 +0100 (mer, 14 fév 2007) | 7 lines
merged revision 38798 into the branch
fixed pyparser's unit tests
(svn merge -r 35032:38798 http://codespeak.net/svn/pypy/dist)
------------------------------------------------------------------------
r38796 | adim | 2007-02-14 11:53:28 +0100 (mer, 14 fév 2007) | 1 line
small rpython fixes
------------------------------------------------------------------------
r36913 | syt | 2007-01-18 10:07:03 +0100 (jeu, 18 jan 2007) | 1 line
make abstract builder wrappable
------------------------------------------------------------------------
r36912 | syt | 2007-01-18 10:06:29 +0100 (jeu, 18 jan 2007) | 1 line
fix target
------------------------------------------------------------------------
r36911 | syt | 2007-01-18 10:05:45 +0100 (jeu, 18 jan 2007) | 5 lines
Leysin sprint work:
more tests and bug fixes on the parser package
------------------------------------------------------------------------
r36910 | syt | 2007-01-18 10:01:45 +0100 (jeu, 18 jan 2007) | 5 lines
Leysin sprint work (mwh around):
refactoring and fixes to make it tranlatable (not yet there though)
------------------------------------------------------------------------
r36451 | adim | 2007-01-11 10:36:52 +0100 (jeu, 11 jan 2007) | 1 line
try to avoid using global PYTHON_PARSER
------------------------------------------------------------------------
r36450 | adim | 2007-01-11 10:34:19 +0100 (jeu, 11 jan 2007) | 2 lines
use parser's tokens dict and get rid of setattr() usage in pytoken
------------------------------------------------------------------------
r35939 | adim | 2006-12-21 17:40:55 +0100 (jeu, 21 déc 2006) | 1 line
renamed get_pyparser_for_version to make_pyparser
------------------------------------------------------------------------
r35934 | adim | 2006-12-21 16:19:02 +0100 (jeu, 21 déc 2006) | 2 lines
added a hook to modify the grammar rules at applevel
------------------------------------------------------------------------
r35933 | adim | 2006-12-21 16:13:54 +0100 (jeu, 21 déc 2006) | 5 lines
- added a reference to the parser in AstBuilder, and tried to avoid using
PYTHON_PARSER as much as possible.
- split astbuilder.py in 2 modules
------------------------------------------------------------------------
r35754 | adim | 2006-12-14 17:28:05 +0100 (jeu, 14 déc 2006) | 6 lines
small pyparser refactorings
- removed some unused code
- eased a bit python parser creation (build_parser / get_pyparser)
------------------------------------------------------------------------
r35729 | adim | 2006-12-14 11:17:18 +0100 (jeu, 14 déc 2006) | 1 line
removing old / unused code
------------------------------------------------------------------------
r35066 | adim | 2006-11-28 10:56:19 +0100 (mar, 28 nov 2006) | 4 lines
for some reason, svn failed to completly commit the merge yesterday :-(
This is the second part of the merge
------------------------------------------------------------------------
r35047 | adim | 2006-11-27 18:15:58 +0100 (lun, 27 nov 2006) | 6 lines
merging revision 35032 into the branch
Note: still need to backport keyword management from trunk
(svn merge -r 22393:35032 http://codespeak.net/svn/pypy/dist)
------------------------------------------------------------------------
r22813 | ludal | 2006-01-29 02:21:18 +0100 (dim, 29 jan 2006) | 4 lines
refactoring III
most tests pass. a problem remains with TupleBuilder messing on import xxx
------------------------------------------------------------------------
r22790 | ludal | 2006-01-28 15:13:53 +0100 (sam, 28 jan 2006) | 4 lines
refactor, part II
forgot the parser class
make pythonlexer.py use it
------------------------------------------------------------------------
r22761 | ludal | 2006-01-28 02:10:44 +0100 (sam, 28 jan 2006) | 4 lines
big refactoring, of the parser -- part I
isolates management of symbols and grammar rules into a Parser class
------------------------------------------------------------------------
r22604 | adim | 2006-01-24 17:28:50 +0100 (mar, 24 jan 2006) | 1 line
oops, forgot to checkin tracer update
------------------------------------------------------------------------
r22583 | ludal | 2006-01-24 13:18:01 +0100 (mar, 24 jan 2006) | 2 lines
bugfixes
------------------------------------------------------------------------
r22570 | adim | 2006-01-24 10:55:56 +0100 (mar, 24 jan 2006) | 1 line
updated assign tracer example with insert_before / insert_after
------------------------------------------------------------------------
r22556 | adim | 2006-01-24 09:52:24 +0100 (mar, 24 jan 2006) | 3 lines
added insert_after and insert_before methods on appropriate Node classes
(exposed those methods at applevel)
------------------------------------------------------------------------
r22540 | ludal | 2006-01-23 18:14:22 +0100 (lun, 23 jan 2006) | 4 lines
(ludal,adim)
debugging and first attempt to replace old with new grammar parser
------------------------------------------------------------------------
r22533 | ludal | 2006-01-23 16:42:50 +0100 (lun, 23 jan 2006) | 5 lines
(adim,ludal)
a new (not yet annotatable) EBNFParser that will build grammar parsers at runtime
------------------------------------------------------------------------
r22399 | adim | 2006-01-18 17:27:36 +0100 (mer, 18 jan 2006) | 4 lines
- added a parent attribute to AST nodes
- added a few hook examples (they will probably be used as a basis for improvements)
------------------------------------------------------------------------
r22393 | adim | 2006-01-18 16:36:14 +0100 (mer, 18 jan 2006) | 3 lines
create a branch to explore ast manipulation without having to make sure PyPy compiles
before each checkin
------------------------------------------------------------------------
Modified: pypy/dist/pypy/config/pypyoption.py
==============================================================================
--- pypy/dist/pypy/config/pypyoption.py (original)
+++ pypy/dist/pypy/config/pypyoption.py Wed Feb 28 18:30:48 2007
@@ -24,7 +24,7 @@
working_modules = default_modules.copy()
working_modules.update(dict.fromkeys(
["rsocket", "unicodedata", "mmap", "fcntl", "rctime", "select",
- "crypt", "signal",
+ "crypt", "signal", "dyngram",
]
))
Added: pypy/dist/pypy/doc/config/objspace.usemodules.dyngram.txt
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/doc/config/objspace.usemodules.dyngram.txt Wed Feb 28 18:30:48 2007
@@ -0,0 +1,7 @@
+Use the 'dyngram' module.
+
+The 'dyngram' module exports the 'insert_grammar_rule' function to
+application-level code. This function allows to modify dynamically
+the python the grammar.
+
+
Modified: pypy/dist/pypy/interpreter/astcompiler/ast.py
==============================================================================
--- pypy/dist/pypy/interpreter/astcompiler/ast.py (original)
+++ pypy/dist/pypy/interpreter/astcompiler/ast.py Wed Feb 28 18:30:48 2007
@@ -30,6 +30,7 @@
def __init__(self, lineno = -1):
self.lineno = lineno
self.filename = ""
+ self.parent = None
#self.scope = None
def getChildren(self):
@@ -61,6 +62,12 @@
def descr_repr( self, space ):
return space.wrap( self.__repr__() )
+ def fget_parent(space, self):
+ return space.wrap(self.parent)
+
+ def fset_parent(space, self, w_parent):
+ self.parent = space.interp_w(Node, w_parent, can_be_None=False)
+
def descr_getChildNodes( self, space ):
lst = self.getChildNodes()
return space.newlist( [ space.wrap( it ) for it in lst ] )
@@ -84,6 +91,7 @@
mutate = interp2app(descr_node_mutate, unwrap_spec=[ ObjSpace, W_Root, W_Root ] ),
lineno = interp_attrproperty('lineno', cls=Node),
filename = interp_attrproperty('filename', cls=Node),
+ parent=GetSetProperty(Node.fget_parent, Node.fset_parent),
)
Node.typedef.acceptable_as_base_class = False
@@ -363,6 +371,15 @@
del self.nodes[:]
for w_itm in space.unpackiterable(w_arg):
self.nodes.append( space.interp_w(Node, w_itm))
+ def descr_insert_after(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node) + 1
+ self.nodes[index:index] = added_nodes
+
+ def descr_insert_before(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node)
+ self.nodes[index:index] = added_nodes
def descr_And_new(space, w_subtype, w_nodes, lineno=-1):
self = space.allocate_instance(And, w_subtype)
@@ -391,6 +408,8 @@
accept=interp2app(descr_And_accept, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
mutate=interp2app(descr_And_mutate, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
nodes=GetSetProperty(And.fget_nodes, And.fset_nodes ),
+ insert_after=interp2app(And.descr_insert_after.im_func, unwrap_spec=[ObjSpace, And, Node, W_Root]),
+ insert_before=interp2app(And.descr_insert_before.im_func, unwrap_spec=[ObjSpace, And, Node, W_Root]),
)
And.typedef.acceptable_as_base_class = False
@@ -536,6 +555,15 @@
del self.nodes[:]
for w_itm in space.unpackiterable(w_arg):
self.nodes.append( space.interp_w(Node, w_itm))
+ def descr_insert_after(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node) + 1
+ self.nodes[index:index] = added_nodes
+
+ def descr_insert_before(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node)
+ self.nodes[index:index] = added_nodes
def descr_AssList_new(space, w_subtype, w_nodes, lineno=-1):
self = space.allocate_instance(AssList, w_subtype)
@@ -564,6 +592,8 @@
accept=interp2app(descr_AssList_accept, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
mutate=interp2app(descr_AssList_mutate, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
nodes=GetSetProperty(AssList.fget_nodes, AssList.fset_nodes ),
+ insert_after=interp2app(AssList.descr_insert_after.im_func, unwrap_spec=[ObjSpace, AssList, Node, W_Root]),
+ insert_before=interp2app(AssList.descr_insert_before.im_func, unwrap_spec=[ObjSpace, AssList, Node, W_Root]),
)
AssList.typedef.acceptable_as_base_class = False
@@ -671,6 +701,15 @@
del self.nodes[:]
for w_itm in space.unpackiterable(w_arg):
self.nodes.append( space.interp_w(Node, w_itm))
+ def descr_insert_after(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node) + 1
+ self.nodes[index:index] = added_nodes
+
+ def descr_insert_before(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node)
+ self.nodes[index:index] = added_nodes
def descr_AssTuple_new(space, w_subtype, w_nodes, lineno=-1):
self = space.allocate_instance(AssTuple, w_subtype)
@@ -699,6 +738,8 @@
accept=interp2app(descr_AssTuple_accept, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
mutate=interp2app(descr_AssTuple_mutate, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
nodes=GetSetProperty(AssTuple.fget_nodes, AssTuple.fset_nodes ),
+ insert_after=interp2app(AssTuple.descr_insert_after.im_func, unwrap_spec=[ObjSpace, AssTuple, Node, W_Root]),
+ insert_before=interp2app(AssTuple.descr_insert_before.im_func, unwrap_spec=[ObjSpace, AssTuple, Node, W_Root]),
)
AssTuple.typedef.acceptable_as_base_class = False
@@ -820,6 +861,15 @@
del self.nodes[:]
for w_itm in space.unpackiterable(w_arg):
self.nodes.append( space.interp_w(Node, w_itm))
+ def descr_insert_after(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node) + 1
+ self.nodes[index:index] = added_nodes
+
+ def descr_insert_before(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node)
+ self.nodes[index:index] = added_nodes
def fget_expr( space, self):
return space.wrap(self.expr)
def fset_expr( space, self, w_arg):
@@ -858,6 +908,8 @@
accept=interp2app(descr_Assign_accept, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
mutate=interp2app(descr_Assign_mutate, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
nodes=GetSetProperty(Assign.fget_nodes, Assign.fset_nodes ),
+ insert_after=interp2app(Assign.descr_insert_after.im_func, unwrap_spec=[ObjSpace, Assign, Node, W_Root]),
+ insert_before=interp2app(Assign.descr_insert_before.im_func, unwrap_spec=[ObjSpace, Assign, Node, W_Root]),
expr=GetSetProperty(Assign.fget_expr, Assign.fset_expr ),
)
Assign.typedef.acceptable_as_base_class = False
@@ -1100,6 +1152,15 @@
del self.nodes[:]
for w_itm in space.unpackiterable(w_arg):
self.nodes.append( space.interp_w(Node, w_itm))
+ def descr_insert_after(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node) + 1
+ self.nodes[index:index] = added_nodes
+
+ def descr_insert_before(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node)
+ self.nodes[index:index] = added_nodes
def descr_Bitand_new(space, w_subtype, w_nodes, lineno=-1):
self = space.allocate_instance(Bitand, w_subtype)
@@ -1128,6 +1189,8 @@
accept=interp2app(descr_Bitand_accept, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
mutate=interp2app(descr_Bitand_mutate, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
nodes=GetSetProperty(Bitand.fget_nodes, Bitand.fset_nodes ),
+ insert_after=interp2app(Bitand.descr_insert_after.im_func, unwrap_spec=[ObjSpace, Bitand, Node, W_Root]),
+ insert_before=interp2app(Bitand.descr_insert_before.im_func, unwrap_spec=[ObjSpace, Bitand, Node, W_Root]),
)
Bitand.typedef.acceptable_as_base_class = False
@@ -1166,6 +1229,15 @@
del self.nodes[:]
for w_itm in space.unpackiterable(w_arg):
self.nodes.append( space.interp_w(Node, w_itm))
+ def descr_insert_after(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node) + 1
+ self.nodes[index:index] = added_nodes
+
+ def descr_insert_before(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node)
+ self.nodes[index:index] = added_nodes
def descr_Bitor_new(space, w_subtype, w_nodes, lineno=-1):
self = space.allocate_instance(Bitor, w_subtype)
@@ -1194,6 +1266,8 @@
accept=interp2app(descr_Bitor_accept, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
mutate=interp2app(descr_Bitor_mutate, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
nodes=GetSetProperty(Bitor.fget_nodes, Bitor.fset_nodes ),
+ insert_after=interp2app(Bitor.descr_insert_after.im_func, unwrap_spec=[ObjSpace, Bitor, Node, W_Root]),
+ insert_before=interp2app(Bitor.descr_insert_before.im_func, unwrap_spec=[ObjSpace, Bitor, Node, W_Root]),
)
Bitor.typedef.acceptable_as_base_class = False
@@ -1232,6 +1306,15 @@
del self.nodes[:]
for w_itm in space.unpackiterable(w_arg):
self.nodes.append( space.interp_w(Node, w_itm))
+ def descr_insert_after(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node) + 1
+ self.nodes[index:index] = added_nodes
+
+ def descr_insert_before(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node)
+ self.nodes[index:index] = added_nodes
def descr_Bitxor_new(space, w_subtype, w_nodes, lineno=-1):
self = space.allocate_instance(Bitxor, w_subtype)
@@ -1260,6 +1343,8 @@
accept=interp2app(descr_Bitxor_accept, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
mutate=interp2app(descr_Bitxor_mutate, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
nodes=GetSetProperty(Bitxor.fget_nodes, Bitxor.fset_nodes ),
+ insert_after=interp2app(Bitxor.descr_insert_after.im_func, unwrap_spec=[ObjSpace, Bitxor, Node, W_Root]),
+ insert_before=interp2app(Bitxor.descr_insert_before.im_func, unwrap_spec=[ObjSpace, Bitxor, Node, W_Root]),
)
Bitxor.typedef.acceptable_as_base_class = False
@@ -1834,6 +1919,15 @@
del self.nodes[:]
for w_itm in space.unpackiterable(w_arg):
self.nodes.append( space.interp_w(Node, w_itm))
+ def descr_insert_after(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node) + 1
+ self.nodes[index:index] = added_nodes
+
+ def descr_insert_before(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node)
+ self.nodes[index:index] = added_nodes
def descr_Decorators_new(space, w_subtype, w_nodes, lineno=-1):
self = space.allocate_instance(Decorators, w_subtype)
@@ -1862,6 +1956,8 @@
accept=interp2app(descr_Decorators_accept, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
mutate=interp2app(descr_Decorators_mutate, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
nodes=GetSetProperty(Decorators.fget_nodes, Decorators.fset_nodes ),
+ insert_after=interp2app(Decorators.descr_insert_after.im_func, unwrap_spec=[ObjSpace, Decorators, Node, W_Root]),
+ insert_before=interp2app(Decorators.descr_insert_before.im_func, unwrap_spec=[ObjSpace, Decorators, Node, W_Root]),
)
Decorators.typedef.acceptable_as_base_class = False
@@ -3544,6 +3640,15 @@
del self.nodes[:]
for w_itm in space.unpackiterable(w_arg):
self.nodes.append( space.interp_w(Node, w_itm))
+ def descr_insert_after(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node) + 1
+ self.nodes[index:index] = added_nodes
+
+ def descr_insert_before(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node)
+ self.nodes[index:index] = added_nodes
def descr_List_new(space, w_subtype, w_nodes, lineno=-1):
self = space.allocate_instance(List, w_subtype)
@@ -3572,6 +3677,8 @@
accept=interp2app(descr_List_accept, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
mutate=interp2app(descr_List_mutate, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
nodes=GetSetProperty(List.fget_nodes, List.fset_nodes ),
+ insert_after=interp2app(List.descr_insert_after.im_func, unwrap_spec=[ObjSpace, List, Node, W_Root]),
+ insert_before=interp2app(List.descr_insert_before.im_func, unwrap_spec=[ObjSpace, List, Node, W_Root]),
)
List.typedef.acceptable_as_base_class = False
@@ -4172,6 +4279,15 @@
del self.nodes[:]
for w_itm in space.unpackiterable(w_arg):
self.nodes.append( space.interp_w(Node, w_itm))
+ def descr_insert_after(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node) + 1
+ self.nodes[index:index] = added_nodes
+
+ def descr_insert_before(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node)
+ self.nodes[index:index] = added_nodes
def descr_Or_new(space, w_subtype, w_nodes, lineno=-1):
self = space.allocate_instance(Or, w_subtype)
@@ -4200,6 +4316,8 @@
accept=interp2app(descr_Or_accept, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
mutate=interp2app(descr_Or_mutate, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
nodes=GetSetProperty(Or.fget_nodes, Or.fset_nodes ),
+ insert_after=interp2app(Or.descr_insert_after.im_func, unwrap_spec=[ObjSpace, Or, Node, W_Root]),
+ insert_before=interp2app(Or.descr_insert_before.im_func, unwrap_spec=[ObjSpace, Or, Node, W_Root]),
)
Or.typedef.acceptable_as_base_class = False
@@ -4350,6 +4468,15 @@
del self.nodes[:]
for w_itm in space.unpackiterable(w_arg):
self.nodes.append( space.interp_w(Node, w_itm))
+ def descr_insert_after(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node) + 1
+ self.nodes[index:index] = added_nodes
+
+ def descr_insert_before(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node)
+ self.nodes[index:index] = added_nodes
def fget_dest( space, self):
if self.dest is None:
return space.w_None
@@ -4392,6 +4519,8 @@
accept=interp2app(descr_Print_accept, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
mutate=interp2app(descr_Print_mutate, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
nodes=GetSetProperty(Print.fget_nodes, Print.fset_nodes ),
+ insert_after=interp2app(Print.descr_insert_after.im_func, unwrap_spec=[ObjSpace, Print, Node, W_Root]),
+ insert_before=interp2app(Print.descr_insert_before.im_func, unwrap_spec=[ObjSpace, Print, Node, W_Root]),
dest=GetSetProperty(Print.fget_dest, Print.fset_dest ),
)
Print.typedef.acceptable_as_base_class = False
@@ -4439,6 +4568,15 @@
del self.nodes[:]
for w_itm in space.unpackiterable(w_arg):
self.nodes.append( space.interp_w(Node, w_itm))
+ def descr_insert_after(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node) + 1
+ self.nodes[index:index] = added_nodes
+
+ def descr_insert_before(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node)
+ self.nodes[index:index] = added_nodes
def fget_dest( space, self):
if self.dest is None:
return space.w_None
@@ -4481,6 +4619,8 @@
accept=interp2app(descr_Printnl_accept, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
mutate=interp2app(descr_Printnl_mutate, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
nodes=GetSetProperty(Printnl.fget_nodes, Printnl.fset_nodes ),
+ insert_after=interp2app(Printnl.descr_insert_after.im_func, unwrap_spec=[ObjSpace, Printnl, Node, W_Root]),
+ insert_before=interp2app(Printnl.descr_insert_before.im_func, unwrap_spec=[ObjSpace, Printnl, Node, W_Root]),
dest=GetSetProperty(Printnl.fget_dest, Printnl.fset_dest ),
)
Printnl.typedef.acceptable_as_base_class = False
@@ -4856,6 +4996,15 @@
del self.nodes[:]
for w_itm in space.unpackiterable(w_arg):
self.nodes.append( space.interp_w(Node, w_itm))
+ def descr_insert_after(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node) + 1
+ self.nodes[index:index] = added_nodes
+
+ def descr_insert_before(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node)
+ self.nodes[index:index] = added_nodes
def descr_Sliceobj_new(space, w_subtype, w_nodes, lineno=-1):
self = space.allocate_instance(Sliceobj, w_subtype)
@@ -4884,6 +5033,8 @@
accept=interp2app(descr_Sliceobj_accept, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
mutate=interp2app(descr_Sliceobj_mutate, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
nodes=GetSetProperty(Sliceobj.fget_nodes, Sliceobj.fset_nodes ),
+ insert_after=interp2app(Sliceobj.descr_insert_after.im_func, unwrap_spec=[ObjSpace, Sliceobj, Node, W_Root]),
+ insert_before=interp2app(Sliceobj.descr_insert_before.im_func, unwrap_spec=[ObjSpace, Sliceobj, Node, W_Root]),
)
Sliceobj.typedef.acceptable_as_base_class = False
@@ -4922,6 +5073,15 @@
del self.nodes[:]
for w_itm in space.unpackiterable(w_arg):
self.nodes.append( space.interp_w(Node, w_itm))
+ def descr_insert_after(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node) + 1
+ self.nodes[index:index] = added_nodes
+
+ def descr_insert_before(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node)
+ self.nodes[index:index] = added_nodes
def descr_Stmt_new(space, w_subtype, w_nodes, lineno=-1):
self = space.allocate_instance(Stmt, w_subtype)
@@ -4950,6 +5110,8 @@
accept=interp2app(descr_Stmt_accept, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
mutate=interp2app(descr_Stmt_mutate, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
nodes=GetSetProperty(Stmt.fget_nodes, Stmt.fset_nodes ),
+ insert_after=interp2app(Stmt.descr_insert_after.im_func, unwrap_spec=[ObjSpace, Stmt, Node, W_Root]),
+ insert_before=interp2app(Stmt.descr_insert_before.im_func, unwrap_spec=[ObjSpace, Stmt, Node, W_Root]),
)
Stmt.typedef.acceptable_as_base_class = False
@@ -5352,6 +5514,15 @@
del self.nodes[:]
for w_itm in space.unpackiterable(w_arg):
self.nodes.append( space.interp_w(Node, w_itm))
+ def descr_insert_after(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node) + 1
+ self.nodes[index:index] = added_nodes
+
+ def descr_insert_before(space, self, node, w_added_nodes):
+ added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]
+ index = self.nodes.index(node)
+ self.nodes[index:index] = added_nodes
def descr_Tuple_new(space, w_subtype, w_nodes, lineno=-1):
self = space.allocate_instance(Tuple, w_subtype)
@@ -5380,6 +5551,8 @@
accept=interp2app(descr_Tuple_accept, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
mutate=interp2app(descr_Tuple_mutate, unwrap_spec=[ObjSpace, W_Root, W_Root] ),
nodes=GetSetProperty(Tuple.fget_nodes, Tuple.fset_nodes ),
+ insert_after=interp2app(Tuple.descr_insert_after.im_func, unwrap_spec=[ObjSpace, Tuple, Node, W_Root]),
+ insert_before=interp2app(Tuple.descr_insert_before.im_func, unwrap_spec=[ObjSpace, Tuple, Node, W_Root]),
)
Tuple.typedef.acceptable_as_base_class = False
Modified: pypy/dist/pypy/interpreter/astcompiler/ast.txt
==============================================================================
--- pypy/dist/pypy/interpreter/astcompiler/ast.txt (original)
+++ pypy/dist/pypy/interpreter/astcompiler/ast.txt Wed Feb 28 18:30:48 2007
@@ -71,7 +71,7 @@
CallFunc: node, args!, star_args& = None, dstar_args& = None
Keyword: name*str, expr
Subscript: expr, flags*int, sub
-Ellipsis:
+Ellipsis:
Sliceobj: nodes!
Slice: expr, flags*int, lower&, upper&
Assert: test, fail&
@@ -337,6 +337,31 @@
return space.call_method(w_visitor, "visitCompare", w_self)
+def descr_Compare_mutate(space, w_self, w_visitor):
+ w_expr = space.getattr(w_self, space.wrap("expr"))
+ w_mutate_expr = space.getattr(w_expr, space.wrap("mutate"))
+ w_mutate_expr_args = Arguments(space, [ w_visitor ])
+ w_new_expr = space.call_args(w_mutate_expr, w_mutate_expr_args)
+ space.setattr(w_self, space.wrap("expr"), w_new_expr)
+
+ w_list = space.getattr(w_self, space.wrap("ops"))
+ list_w = space.unpackiterable(w_list)
+ newlist_w = []
+ for w_item in list_w:
+ w_opname, w_node = space.unpackiterable(w_item, 2)
+
+ w_node_mutate = space.getattr(w_node, space.wrap("mutate"))
+ w_node_mutate_args = Arguments(space, [ w_visitor ])
+ w_newnode = space.call_args(w_node_mutate, w_node_mutate_args)
+
+ newlist_w.append(space.newtuple([w_opname, w_newnode]))
+ w_newlist = space.newlist(newlist_w)
+ space.setattr(w_self, space.wrap("ops"), w_newlist)
+ w_visitCompare = space.getattr(w_visitor, space.wrap("visitCompare"))
+ w_visitCompare_args = Arguments(space, [ w_self ])
+ return space.call_args(w_visitCompare, w_visitCompare_args)
+
+
def descr_Dict_new(space, w_subtype, w_items, lineno=-1):
self = space.allocate_instance(Dict, w_subtype)
items = []
Modified: pypy/dist/pypy/interpreter/astcompiler/astgen.py
==============================================================================
--- pypy/dist/pypy/interpreter/astcompiler/astgen.py (original)
+++ pypy/dist/pypy/interpreter/astcompiler/astgen.py Wed Feb 28 18:30:48 2007
@@ -319,6 +319,40 @@
print >> buf, " self.%s[:] = newlist"%(argname)
print >> buf, " return visitor.visit%s(self)" % self.name
+ def _gen_insertnodes_func(self, buf):
+ print >> buf, " def descr_insert_after(space, self, node, w_added_nodes):"
+ print >> buf, " added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]"
+ print >> buf, " index = self.nodes.index(node) + 1"
+ print >> buf, " self.nodes[index:index] = added_nodes"
+ print >> buf
+ print >> buf, " def descr_insert_before(space, self, node, w_added_nodes):"
+ print >> buf, " added_nodes = [space.interp_w(Node, w_node) for w_node in space.unpackiterable(w_added_nodes)]"
+ print >> buf, " index = self.nodes.index(node)"
+ print >> buf, " self.nodes[index:index] = added_nodes"
+
+
+ def _gen_mutate(self, buf):
+ print >> buf, " def mutate(self, visitor):"
+ if len(self.argnames) != 0:
+ for argname in self.argnames:
+ if argname in self.mutate_nodes:
+ for line in self.mutate_nodes[argname]:
+ if line.strip():
+ print >> buf, ' ' + line
+ elif self.argprops[argname] == P_NODE:
+ print >> buf, " self.%s = self.%s.mutate(visitor)" % (argname,argname)
+ elif self.argprops[argname] == P_NONE:
+ print >> buf, " if self.%s is not None:" % (argname,)
+ print >> buf, " self.%s = self.%s.mutate(visitor)" % (argname,argname)
+ elif self.argprops[argname] == P_NESTED:
+ print >> buf, " newlist = []"
+ print >> buf, " for n in self.%s:"%(argname)
+ print >> buf, " item = n.mutate(visitor)"
+ print >> buf, " if item is not None:"
+ print >> buf, " newlist.append(item)"
+ print >> buf, " self.%s[:] = newlist"%(argname)
+ print >> buf, " return visitor.visit%s(self)" % self.name
+
def _gen_fget_func(self, buf, attr, prop ):
# FGET
print >> buf, " def fget_%s( space, self):" % attr
@@ -370,6 +404,8 @@
if "fset_%s" % attr not in self.additional_methods:
self._gen_fset_func( buf, attr, prop )
+ if prop[attr] == P_NESTED and attr == 'nodes':
+ self._gen_insertnodes_func(buf)
def _gen_descr_mutate(self, buf):
if self.applevel_mutate:
@@ -426,6 +462,9 @@
print >> buf, " mutate=interp2app(descr_%s_mutate, unwrap_spec=[ObjSpace, W_Root, W_Root] )," % self.name
for attr in self.argnames:
print >> buf, " %s=GetSetProperty(%s.fget_%s, %s.fset_%s )," % (attr,self.name,attr,self.name,attr)
+ if self.argprops[attr] == P_NESTED and attr == "nodes":
+ print >> buf, " insert_after=interp2app(%s.descr_insert_after.im_func, unwrap_spec=[ObjSpace, %s, Node, W_Root])," % (self.name, self.name)
+ print >> buf, " insert_before=interp2app(%s.descr_insert_before.im_func, unwrap_spec=[ObjSpace, %s, Node, W_Root])," % (self.name, self.name)
print >> buf, " )"
print >> buf, "%s.typedef.acceptable_as_base_class = False" % self.name
@@ -660,6 +699,7 @@
def __init__(self, lineno = -1):
self.lineno = lineno
self.filename = ""
+ self.parent = None
#self.scope = None
def getChildren(self):
@@ -691,6 +731,12 @@
def descr_repr( self, space ):
return space.wrap( self.__repr__() )
+ def fget_parent(space, self):
+ return space.wrap(self.parent)
+
+ def fset_parent(space, self, w_parent):
+ self.parent = space.interp_w(Node, w_parent, can_be_None=False)
+
def descr_getChildNodes( self, space ):
lst = self.getChildNodes()
return space.newlist( [ space.wrap( it ) for it in lst ] )
@@ -714,6 +760,7 @@
mutate = interp2app(descr_node_mutate, unwrap_spec=[ ObjSpace, W_Root, W_Root ] ),
lineno = interp_attrproperty('lineno', cls=Node),
filename = interp_attrproperty('filename', cls=Node),
+ parent=GetSetProperty(Node.fget_parent, Node.fset_parent),
)
Node.typedef.acceptable_as_base_class = False
Modified: pypy/dist/pypy/interpreter/pycompiler.py
==============================================================================
--- pypy/dist/pypy/interpreter/pycompiler.py (original)
+++ pypy/dist/pypy/interpreter/pycompiler.py Wed Feb 28 18:30:48 2007
@@ -189,6 +189,7 @@
warnings.warn_explicit = old_warn_explicit
+
########
class PythonAstCompiler(PyCodeCompiler):
"""Uses the stdlib's python implementation of compiler
@@ -198,6 +199,13 @@
of incomplete inputs (e.g. we shouldn't re-compile from sracth
the whole source after having only added a new '\n')
"""
+ def __init__(self, space):
+ from pyparser.pythonparse import PYTHON_PARSER
+ PyCodeCompiler.__init__(self, space)
+ self.parser = PYTHON_PARSER
+ self.additional_rules = {}
+
+
def compile(self, source, filename, mode, flags):
from pyparser.error import SyntaxError
from pypy.interpreter import astcompiler
@@ -205,15 +213,18 @@
from pypy.interpreter.astcompiler.pycodegen import InteractiveCodeGenerator
from pypy.interpreter.astcompiler.pycodegen import ExpressionCodeGenerator
from pypy.interpreter.astcompiler.ast import Node
- from pyparser.pythonutil import AstBuilder, PYTHON_PARSER, TARGET_DICT
+ from pyparser.astbuilder import AstBuilder
from pypy.interpreter.pycode import PyCode
+ from pypy.interpreter.function import Function
flags |= stdlib___future__.generators.compiler_flag # always on (2.2 compat)
space = self.space
try:
- builder = AstBuilder(space=space)
- target_rule = TARGET_DICT[mode]
- PYTHON_PARSER.parse_source(source, target_rule, builder, flags)
+ builder = AstBuilder(self.parser, space=space)
+ for rulename, buildfunc in self.additional_rules.iteritems():
+ assert isinstance(buildfunc, Function)
+ builder.user_build_rules[rulename] = buildfunc
+ self.parser.parse_source(source, mode, builder, flags)
ast_tree = builder.rule_stack[-1]
encoding = builder.source_encoding
except SyntaxError, e:
@@ -251,4 +262,29 @@
def install_compiler_hook(space, w_callable):
# if not space.get( w_callable ):
# raise OperationError( space.w_TypeError( space.wrap( "must have a callable" ) )
- space.default_compiler.w_compile_hook = w_callable
+ space.default_compiler.w_compile_hook = w_callable
+
+def insert_grammar_rule(space, w_rule, w_buildfuncs):
+ """inserts new grammar rules to the default compiler"""
+ from pypy.interpreter import function
+ rule = space.str_w(w_rule)
+ #buildfuncs_w = w_buildfuncs.content
+ buildfuncs = {}
+ #for w_name, w_func in buildfuncs_w.iteritems():
+ # buildfuncs[space.str_w(w_name)] = space.unwrap(w_func)
+ w_iter = space.iter(w_buildfuncs)
+ while 1:
+ try:
+ w_key = space.next(w_iter)
+ w_func = space.getitem(w_buildfuncs, w_key)
+ buildfuncs[space.str_w(w_key)] = space.interp_w(function.Function, w_func)
+ except OperationError, e:
+ if not e.match(space, space.w_StopIteration):
+ raise
+ break
+ space.default_compiler.additional_rules = buildfuncs
+ space.default_compiler.parser.insert_rule(rule)
+
+# XXX cyclic import
+#from pypy.interpreter.baseobjspace import ObjSpace
+#insert_grammar_rule.unwrap_spec = [ObjSpace, str, dict]
Modified: pypy/dist/pypy/interpreter/pyparser/astbuilder.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/astbuilder.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/astbuilder.py Wed Feb 28 18:30:48 2007
@@ -1,518 +1,18 @@
"""This module provides the astbuilder class which is to be used
-by GrammarElements to directly build the AST during parsing
+by GrammarElements to directly build the AS during parsing
without going through the nested tuples step
"""
from grammar import BaseGrammarBuilder, AbstractContext
+
+from pypy.interpreter.function import Function
from pypy.interpreter.astcompiler import ast, consts
-from pypy.interpreter.pyparser import pythonparse
-import pypy.interpreter.pyparser.pytoken as tok
+# from pypy.interpreter.pyparser import pythonparse
+#import pypy.interpreter.pyparser.pytoken as tok
from pypy.interpreter.pyparser.error import SyntaxError
from pypy.interpreter.pyparser.parsestring import parsestr
-
-sym = pythonparse.PYTHON_PARSER.symbols
-
-DEBUG_MODE = 0
-
-### Parsing utilites #################################################
-def parse_except_clause(tokens):
- """parses 'except' [test [',' test]] ':' suite
- and returns a 4-tuple : (tokens_read, expr1, expr2, except_body)
- """
- lineno = tokens[0].lineno
- clause_length = 1
- # Read until end of except clause (bound by following 'else',
- # or 'except' or end of tokens)
- while clause_length < len(tokens):
- token = tokens[clause_length]
- if isinstance(token, TokenObject) and \
- (token.get_value() == 'except' or token.get_value() == 'else'):
- break
- clause_length += 1
- if clause_length == 3:
- # case 'except: body'
- return (3, None, None, tokens[2])
- elif clause_length == 4:
- # case 'except Exception: body':
- return (4, tokens[1], None, tokens[3])
- else:
- # case 'except Exception, exc: body'
- return (6, tokens[1], to_lvalue(tokens[3], consts.OP_ASSIGN), tokens[5])
-
-
-def parse_dotted_names(tokens):
- """parses NAME('.' NAME)* and returns full dotted name
-
- this function doesn't assume that the <tokens> list ends after the
- last 'NAME' element
- """
- first = tokens[0]
- assert isinstance(first, TokenObject)
- name = first.get_value()
- l = len(tokens)
- index = 1
- for index in range(1, l, 2):
- token = tokens[index]
- assert isinstance(token, TokenObject)
- if token.name != tok.DOT:
- break
- token = tokens[index+1]
- assert isinstance(token, TokenObject)
- name += '.'
- value = token.get_value()
- name += value
- return (index, name)
-
-def parse_argument(tokens):
- """parses function call arguments"""
- l = len(tokens)
- index = 0
- arguments = []
- last_token = None
- building_kw = False
- kw_built = False
- stararg_token = None
- dstararg_token = None
- while index < l:
- cur_token = tokens[index]
- if not isinstance(cur_token, TokenObject):
- index += 1
- if not building_kw:
- arguments.append(cur_token)
- else:
- last_token = arguments.pop()
- assert isinstance(last_token, ast.Name) # used by rtyper
- arguments.append(ast.Keyword(last_token.varname, cur_token, last_token.lineno))
- building_kw = False
- kw_built = True
- continue
- elif cur_token.name == tok.COMMA:
- index += 1
- continue
- elif cur_token.name == tok.EQUAL:
- index += 1
- building_kw = True
- continue
- elif cur_token.name == tok.STAR or cur_token.name == tok.DOUBLESTAR:
- index += 1
- if cur_token.name == tok.STAR:
- stararg_token = tokens[index]
- index += 1
- if index >= l:
- break
- index += 2 # Skip COMMA and DOUBLESTAR
- dstararg_token = tokens[index]
- break
- elif cur_token.get_value() == 'for':
- if len(arguments) != 1:
- raise SyntaxError("invalid syntax", cur_token.lineno,
- cur_token.col)
- expr = arguments[0]
- genexpr_for = parse_genexpr_for(tokens[index:])
- genexpr_for[0].is_outmost = True
- gexp = ast.GenExpr(ast.GenExprInner(expr, genexpr_for, expr.lineno), expr.lineno)
- arguments[0] = gexp
- break
- return arguments, stararg_token, dstararg_token
-
-
-def parse_fpdef(tokens, index):
- """fpdef: fpdef: NAME | '(' fplist ')'
- fplist: fpdef (',' fpdef)* [',']
-
- This intend to be a RPYTHON compliant implementation of _parse_fpdef,
- but it can't work with the default compiler.
- We switched to use astcompiler module now
- """
- nodes = []
- comma = False
- while True:
- token = tokens[index]
- index += 1
- assert isinstance(token, TokenObject)
- if token.name == tok.LPAR: # nested item
- index, node = parse_fpdef(tokens, index)
- elif token.name == tok.RPAR: # end of current nesting
- break
- else: # name
- val = token.get_value()
- node = ast.AssName(val, consts.OP_ASSIGN, token.lineno)
- nodes.append(node)
-
- token = tokens[index]
- index += 1
- assert isinstance(token, TokenObject)
- if token.name == tok.COMMA:
- comma = True
- else:
- assert token.name == tok.RPAR
- break
- if len(nodes) == 1 and not comma:
- node = nodes[0]
- else:
- node = ast.AssTuple(nodes, token.lineno)
- return index, node
-
-def parse_arglist(tokens):
- """returns names, defaults, flags"""
- l = len(tokens)
- index = 0
- defaults = []
- names = []
- flags = 0
- first_with_default = -1
- while index < l:
- cur_token = tokens[index]
- index += 1
- if not isinstance(cur_token, TokenObject):
- # XXX: think of another way to write this test
- defaults.append(cur_token)
- if first_with_default == -1:
- first_with_default = len(names) - 1
- elif cur_token.name == tok.COMMA:
- # We could skip test COMMA by incrementing index cleverly
- # but we might do some experiment on the grammar at some point
- continue
- elif cur_token.name == tok.LPAR:
- index, node = parse_fpdef(tokens, index)
- names.append(node)
- elif cur_token.name == tok.STAR or cur_token.name == tok.DOUBLESTAR:
- if cur_token.name == tok.STAR:
- cur_token = tokens[index]
- assert isinstance(cur_token, TokenObject)
- index += 1
- if cur_token.name == tok.NAME:
- val = cur_token.get_value()
- names.append( ast.AssName( val, consts.OP_ASSIGN ) )
- flags |= consts.CO_VARARGS
- index += 1
- if index >= l:
- break
- else:
- # still more tokens to read
- cur_token = tokens[index]
- index += 1
- else:
- raise SyntaxError("incomplete varags", cur_token.lineno,
- cur_token.col)
- assert isinstance(cur_token, TokenObject)
- if cur_token.name != tok.DOUBLESTAR:
- raise SyntaxError("Unexpected token", cur_token.lineno,
- cur_token.col)
- cur_token = tokens[index]
- index += 1
- assert isinstance(cur_token, TokenObject)
- if cur_token.name == tok.NAME:
- val = cur_token.get_value()
- names.append( ast.AssName( val, consts.OP_ASSIGN ) )
- flags |= consts.CO_VARKEYWORDS
- index += 1
- else:
- raise SyntaxError("incomplete varags", cur_token.lineno,
- cur_token.col)
- if index < l:
- token = tokens[index]
- raise SyntaxError("unexpected token" , token.lineno,
- token.col)
- elif cur_token.name == tok.NAME:
- val = cur_token.get_value()
- names.append( ast.AssName( val, consts.OP_ASSIGN ) )
-
- if first_with_default != -1:
- num_expected_with_default = len(names) - first_with_default
- if flags & consts.CO_VARKEYWORDS:
- num_expected_with_default -= 1
- if flags & consts.CO_VARARGS:
- num_expected_with_default -= 1
- if len(defaults) != num_expected_with_default:
- raise SyntaxError('non-default argument follows default argument',
- tokens[0].lineno, tokens[0].col)
- return names, defaults, flags
-
-
-def parse_listcomp(tokens):
- """parses 'for j in k for i in j if i %2 == 0' and returns
- a GenExprFor instance
- XXX: refactor with listmaker ?
- """
- list_fors = []
- ifs = []
- index = 0
- if tokens:
- lineno = tokens[0].lineno
- else:
- lineno = -1
- while index < len(tokens):
- token = tokens[index]
- assert isinstance(token, TokenObject) # rtyper info + check
- if token.get_value() == 'for':
- index += 1 # skip 'for'
- ass_node = to_lvalue(tokens[index], consts.OP_ASSIGN)
- index += 2 # skip 'in'
- iterables = [tokens[index]]
- index += 1
- while index < len(tokens):
- tok2 = tokens[index]
- if not isinstance(tok2, TokenObject):
- break
- if tok2.name != tok.COMMA:
- break
- iterables.append(tokens[index+1])
- index += 2
- if len(iterables) == 1:
- iterable = iterables[0]
- else:
- iterable = ast.Tuple(iterables, token.lineno)
- while index < len(tokens):
- token = tokens[index]
- assert isinstance(token, TokenObject) # rtyper info
- if token.get_value() == 'if':
- ifs.append(ast.ListCompIf(tokens[index+1], token.lineno))
- index += 2
- else:
- break
- list_fors.append(ast.ListCompFor(ass_node, iterable, ifs, lineno))
- ifs = []
- else:
- assert False, 'Unexpected token: expecting for in listcomp'
- #
- # Original implementation:
- #
- # if tokens[index].get_value() == 'for':
- # index += 1 # skip 'for'
- # ass_node = to_lvalue(tokens[index], consts.OP_ASSIGN)
- # index += 2 # skip 'in'
- # iterable = tokens[index]
- # index += 1
- # while index < len(tokens) and tokens[index].get_value() == 'if':
- # ifs.append(ast.ListCompIf(tokens[index+1]))
- # index += 2
- # list_fors.append(ast.ListCompFor(ass_node, iterable, ifs))
- # ifs = []
- # else:
- # raise ValueError('Unexpected token: %s' % tokens[index])
- return list_fors
-
-
-def parse_genexpr_for(tokens):
- """parses 'for j in k for i in j if i %2 == 0' and returns
- a GenExprFor instance
- XXX: if RPYTHON supports to pass a class object to a function,
- we could refactor parse_listcomp and parse_genexpr_for,
- and call :
- - parse_listcomp(tokens, forclass=ast.GenExprFor, ifclass=...)
- or:
- - parse_listcomp(tokens, forclass=ast.ListCompFor, ifclass=...)
- """
- genexpr_fors = []
- ifs = []
- index = 0
- if tokens:
- lineno = tokens[0].lineno
- else:
- lineno = -1
- while index < len(tokens):
- token = tokens[index]
- assert isinstance(token, TokenObject) # rtyper info + check
- if token.get_value() == 'for':
- index += 1 # skip 'for'
- ass_node = to_lvalue(tokens[index], consts.OP_ASSIGN)
- index += 2 # skip 'in'
- iterable = tokens[index]
- index += 1
- while index < len(tokens):
- token = tokens[index]
- assert isinstance(token, TokenObject) # rtyper info
- if token.get_value() == 'if':
- ifs.append(ast.GenExprIf(tokens[index+1], token.lineno))
- index += 2
- else:
- break
- genexpr_fors.append(ast.GenExprFor(ass_node, iterable, ifs, lineno))
- ifs = []
- else:
- raise SyntaxError('invalid syntax',
- token.lineno, token.col)
- return genexpr_fors
-
-
-def get_docstring(builder,stmt):
- """parses a Stmt node.
-
- If a docstring if found, the Discard node is **removed**
- from <stmt> and the docstring is returned.
-
- If no docstring is found, <stmt> is left unchanged
- and None is returned
- """
- if not isinstance(stmt, ast.Stmt):
- return None
- doc = builder.wrap_none()
- if len(stmt.nodes):
- first_child = stmt.nodes[0]
- if isinstance(first_child, ast.Discard):
- expr = first_child.expr
- if builder.is_basestring_const(expr):
- # This *is* a docstring, remove it from stmt list
- assert isinstance(expr, ast.Const)
- del stmt.nodes[0]
- doc = expr.value
- return doc
-
-
-def to_lvalue(ast_node, flags):
- lineno = ast_node.lineno
- if isinstance( ast_node, ast.Name ):
- return ast.AssName(ast_node.varname, flags, lineno)
- # return ast.AssName(ast_node.name, flags)
- elif isinstance(ast_node, ast.Tuple):
- nodes = []
- # FIXME: should ast_node.getChildren() but it's not annotable
- # because of flatten()
- for node in ast_node.nodes:
- nodes.append(to_lvalue(node, flags))
- return ast.AssTuple(nodes, lineno)
- elif isinstance(ast_node, ast.List):
- nodes = []
- # FIXME: should ast_node.getChildren() but it's not annotable
- # because of flatten()
- for node in ast_node.nodes:
- nodes.append(to_lvalue(node, flags))
- return ast.AssList(nodes, lineno)
- elif isinstance(ast_node, ast.Getattr):
- expr = ast_node.expr
- assert isinstance(ast_node, ast.Getattr)
- attrname = ast_node.attrname
- return ast.AssAttr(expr, attrname, flags, lineno)
- elif isinstance(ast_node, ast.Subscript):
- ast_node.flags = flags
- return ast_node
- elif isinstance(ast_node, ast.Slice):
- ast_node.flags = flags
- return ast_node
- else:
- if isinstance(ast_node, ast.GenExpr):
- raise SyntaxError("assign to generator expression not possible",
- lineno, 0, '')
- elif isinstance(ast_node, ast.ListComp):
- raise SyntaxError("can't assign to list comprehension",
- lineno, 0, '')
- elif isinstance(ast_node, ast.CallFunc):
- if flags == consts.OP_DELETE:
- raise SyntaxError("can't delete function call",
- lineno, 0, '')
- else:
- raise SyntaxError("can't assign to function call",
- lineno, 0, '')
- else:
- raise SyntaxError("can't assign to non-lvalue",
- lineno, 0, '')
-
-def is_augassign( ast_node ):
- if ( isinstance( ast_node, ast.Name ) or
- isinstance( ast_node, ast.Slice ) or
- isinstance( ast_node, ast.Subscript ) or
- isinstance( ast_node, ast.Getattr ) ):
- return True
- return False
-
-def get_atoms(builder, nb):
- atoms = []
- i = nb
- while i>0:
- obj = builder.pop()
- if isinstance(obj, BaseRuleObject):
- i += obj.count
- else:
- atoms.append( obj )
- i -= 1
- atoms.reverse()
- return atoms
-
-#def eval_string(value):
-# """temporary implementation
-#
-# FIXME: need to be finished (check compile.c (parsestr) and
-# stringobject.c (PyString_DecodeEscape()) for complete implementation)
-# """
-# # return eval(value)
-# if len(value) == 2:
-# return ''
-# result = ''
-# length = len(value)
-# quotetype = value[0]
-# index = 1
-# while index < length and value[index] == quotetype:
-# index += 1
-# if index == 6:
-# # empty strings like """""" or ''''''
-# return ''
-# # XXX: is it RPYTHON to do this value[index:-index]
-# chars = [char for char in value[index:len(value)-index]]
-# result = ''.join(chars)
-# result = result.replace('\\\\', '\\')
-# d = {'\\b' : '\b', '\\f' : '\f', '\\t' : '\t', '\\n' : '\n',
-# '\\r' : '\r', '\\v' : '\v', '\\a' : '\a',
-# }
-# for escaped, value in d.items():
-# result = result.replace(escaped, value)
-# return result
-
-
-## misc utilities, especially for power: rule
-def reduce_callfunc(obj, arglist):
- """generic factory for CallFunc nodes"""
- assert isinstance(arglist, ArglistObject)
- return ast.CallFunc(obj, arglist.arguments,
- arglist.stararg, arglist.dstararg, arglist.lineno)
-
-def reduce_subscript(obj, subscript):
- """generic factory for Subscript nodes"""
- assert isinstance(subscript, SubscriptObject)
- return ast.Subscript(obj, consts.OP_APPLY, subscript.value, subscript.lineno)
-
-def reduce_slice(obj, sliceobj):
- """generic factory for Slice nodes"""
- assert isinstance(sliceobj, SlicelistObject)
- if sliceobj.fake_rulename == 'slice':
- start = sliceobj.value[0]
- end = sliceobj.value[1]
- return ast.Slice(obj, consts.OP_APPLY, start, end, sliceobj.lineno)
- else:
- return ast.Subscript(obj, consts.OP_APPLY, ast.Sliceobj(sliceobj.value,
- sliceobj.lineno), sliceobj.lineno)
-
-def parse_attraccess(tokens):
- """parses token list like ['a', '.', 'b', '.', 'c', ...]
-
- and returns an ast node : ast.Getattr(Getattr(Name('a'), 'b'), 'c' ...)
- """
- token = tokens[0]
- # XXX HACK for when parse_attraccess is called from build_decorator
- if isinstance(token, TokenObject):
- val = token.get_value()
- result = ast.Name(val, token.lineno)
- else:
- result = token
- index = 1
- while index < len(tokens):
- token = tokens[index]
- if isinstance(token, TokenObject) and token.name == tok.DOT:
- index += 1
- token = tokens[index]
- assert isinstance(token, TokenObject)
- result = ast.Getattr(result, token.get_value(), token.lineno)
- elif isinstance(token, ArglistObject):
- result = reduce_callfunc(result, token)
- elif isinstance(token, SubscriptObject):
- result = reduce_subscript(result, token)
- elif isinstance(token, SlicelistObject):
- result = reduce_slice(result, token)
- else:
- assert False, "Don't know how to handle index %s of %s" % (index, len(tokens))
- index += 1
- return result
-
+from pypy.interpreter.gateway import interp2app
+from asthelper import *
## building functions helpers
## --------------------------
@@ -546,31 +46,31 @@
top = atoms[0]
if isinstance(top, TokenObject):
# assert isinstance(top, TokenObject) # rtyper
- if top.name == tok.LPAR:
+ if top.name == builder.parser.tokens['LPAR']:
if len(atoms) == 2:
builder.push(ast.Tuple([], top.lineno))
else:
builder.push( atoms[1] )
- elif top.name == tok.LSQB:
+ elif top.name == builder.parser.tokens['LSQB']:
if len(atoms) == 2:
builder.push(ast.List([], top.lineno))
else:
list_node = atoms[1]
list_node.lineno = top.lineno
builder.push(list_node)
- elif top.name == tok.LBRACE:
+ elif top.name == builder.parser.tokens['LBRACE']:
items = []
for index in range(1, len(atoms)-1, 4):
# a : b , c : d
# ^ +1 +2 +3 +4
items.append((atoms[index], atoms[index+2]))
builder.push(ast.Dict(items, top.lineno))
- elif top.name == tok.NAME:
+ elif top.name == builder.parser.tokens['NAME']:
val = top.get_value()
builder.push( ast.Name(val, top.lineno) )
- elif top.name == tok.NUMBER:
+ elif top.name == builder.parser.tokens['NUMBER']:
builder.push(ast.Const(builder.eval_number(top.get_value()), top.lineno))
- elif top.name == tok.STRING:
+ elif top.name == builder.parser.tokens['STRING']:
# need to concatenate strings in atoms
s = ''
if len(atoms) == 1:
@@ -586,7 +86,7 @@
accum.append(parsestr(builder.space, builder.source_encoding, token.get_value()))
w_s = space.call_method(empty, 'join', space.newlist(accum))
builder.push(ast.Const(w_s, top.lineno))
- elif top.name == tok.BACKQUOTE:
+ elif top.name == builder.parser.tokens['BACKQUOTE']:
builder.push(ast.Backquote(atoms[1], atoms[1].lineno))
else:
raise SyntaxError("unexpected tokens", top.lineno, top.col)
@@ -607,11 +107,11 @@
else:
lineno = atoms[0].lineno
token = atoms[-2]
- if isinstance(token, TokenObject) and token.name == tok.DOUBLESTAR:
- obj = parse_attraccess(slicecut(atoms, 0, -2))
+ if isinstance(token, TokenObject) and token.name == builder.parser.tokens['DOUBLESTAR']:
+ obj = parse_attraccess(slicecut(atoms, 0, -2), builder)
builder.push(ast.Power( obj, atoms[-1], lineno))
else:
- obj = parse_attraccess(atoms)
+ obj = parse_attraccess(atoms, builder)
builder.push(obj)
def build_factor(builder, nb):
@@ -622,11 +122,11 @@
token = atoms[0]
lineno = token.lineno
if isinstance(token, TokenObject):
- if token.name == tok.PLUS:
+ if token.name == builder.parser.tokens['PLUS']:
builder.push( ast.UnaryAdd( atoms[1], lineno) )
- if token.name == tok.MINUS:
+ if token.name == builder.parser.tokens['MINUS']:
builder.push( ast.UnarySub( atoms[1], lineno) )
- if token.name == tok.TILDE:
+ if token.name == builder.parser.tokens['TILDE']:
builder.push( ast.Invert( atoms[1], lineno) )
def build_term(builder, nb):
@@ -637,13 +137,13 @@
right = atoms[i]
op_node = atoms[i-1]
assert isinstance(op_node, TokenObject)
- if op_node.name == tok.STAR:
+ if op_node.name == builder.parser.tokens['STAR']:
left = ast.Mul( left, right, left.lineno )
- elif op_node.name == tok.SLASH:
+ elif op_node.name == builder.parser.tokens['SLASH']:
left = ast.Div( left, right, left.lineno )
- elif op_node.name == tok.PERCENT:
+ elif op_node.name == builder.parser.tokens['PERCENT']:
left = ast.Mod( left, right, left.lineno )
- elif op_node.name == tok.DOUBLESLASH:
+ elif op_node.name == builder.parser.tokens['DOUBLESLASH']:
left = ast.FloorDiv( left, right, left.lineno )
else:
token = atoms[i-1]
@@ -658,9 +158,9 @@
right = atoms[i]
op_node = atoms[i-1]
assert isinstance(op_node, TokenObject)
- if op_node.name == tok.PLUS:
+ if op_node.name == builder.parser.tokens['PLUS']:
left = ast.Add( left, right, left.lineno)
- elif op_node.name == tok.MINUS:
+ elif op_node.name == builder.parser.tokens['MINUS']:
left = ast.Sub( left, right, left.lineno)
else:
token = atoms[i-1]
@@ -676,9 +176,9 @@
right = atoms[i]
op_node = atoms[i-1]
assert isinstance(op_node, TokenObject)
- if op_node.name == tok.LEFTSHIFT:
+ if op_node.name == builder.parser.tokens['LEFTSHIFT']:
left = ast.LeftShift( left, right, lineno )
- elif op_node.name == tok.RIGHTSHIFT:
+ elif op_node.name == builder.parser.tokens['RIGHTSHIFT']:
left = ast.RightShift( left, right, lineno )
else:
token = atoms[i-1]
@@ -727,7 +227,7 @@
# 'is', 'is not', 'not' or 'not in' => tok.get_value()
token = atoms[i]
assert isinstance(token, TokenObject)
- op_name = tok.tok_rpunct.get(token.name, token.get_value())
+ op_name = builder.parser.tok_rvalues.get(token.name, token.get_value())
ops.append((op_name, atoms[i+1]))
builder.push(ast.Compare(atoms[0], ops, atoms[0].lineno))
@@ -755,15 +255,18 @@
lineno = token.lineno
assert isinstance(token, TokenObject)
if token.get_value() == 'not':
- builder.push(TokenObject(tok.NAME, 'not in', lineno))
+ builder.push(TokenObject(builder.parser.tokens['NAME'], 'not in', lineno, builder.parser))
else:
- builder.push(TokenObject(tok.NAME, 'is not', lineno))
+ builder.push(TokenObject(builder.parser.tokens['NAME'], 'is not', lineno, builder.parser))
else:
assert False, "TODO" # uh ?
def build_or_test(builder, nb):
return build_binary_expr(builder, nb, ast.Or)
+def build_or_test(builder, nb):
+ return build_binary_expr(builder, nb, ast.Or)
+
def build_and_test(builder, nb):
return build_binary_expr(builder, nb, ast.And)
@@ -811,7 +314,7 @@
return
op = atoms[1]
assert isinstance(op, TokenObject)
- if op.name == tok.EQUAL:
+ if op.name == builder.parser.tokens['EQUAL']:
nodes = []
for i in range(0,l-2,2):
lvalue = to_lvalue(atoms[i], consts.OP_ASSIGN)
@@ -845,7 +348,7 @@
lineno = -1
for n in range(0,l,2):
node = atoms[n]
- if isinstance(node, TokenObject) and node.name == tok.NEWLINE:
+ if isinstance(node, TokenObject) and node.name == builder.parser.tokens['NEWLINE']:
nodes.append(ast.Discard(ast.Const(builder.wrap_none()), node.lineno))
else:
nodes.append(node)
@@ -871,10 +374,10 @@
for node in atoms:
if isinstance(node, ast.Stmt):
stmts.extend(node.nodes)
- elif isinstance(node, TokenObject) and node.name == tok.ENDMARKER:
+ elif isinstance(node, TokenObject) and node.name == builder.parser.tokens['ENDMARKER']:
# XXX Can't we just remove the last element of the list ?
break
- elif isinstance(node, TokenObject) and node.name == tok.NEWLINE:
+ elif isinstance(node, TokenObject) and node.name == builder.parser.tokens['NEWLINE']:
continue
else:
stmts.append(node)
@@ -894,11 +397,12 @@
l = len(atoms)
if l == 1 or l==2:
atom0 = atoms[0]
- if isinstance(atom0, TokenObject) and atom0.name == tok.NEWLINE:
- atom0 = ast.Pass(atom0.lineno)
+ if isinstance(atom0, TokenObject) and atom0.name == builder.parser.tokens['NEWLINE']:
+ # atom0 = ast.Pass(atom0.lineno) # break test_astcompiler
+ atom0 = ast.Stmt([], atom0.lineno) # break test_astbuilder
elif not isinstance(atom0, ast.Stmt):
atom0 = ast.Stmt([atom0], atom0.lineno)
- builder.push(ast.Module(builder.wrap_none(), atom0, atom0.lineno))
+ builder.push(ast.Module(builder.space.w_None, atom0, atom0.lineno))
else:
assert False, "Forbidden path"
@@ -914,7 +418,7 @@
return
items = []
token = atoms[1]
- if isinstance(token, TokenObject) and token.name == tok.COMMA:
+ if isinstance(token, TokenObject) and token.name == builder.parser.tokens['COMMA']:
for i in range(0, l, 2): # this is atoms not 1
items.append(atoms[i])
else:
@@ -944,22 +448,23 @@
atoms = get_atoms(builder, nb)
lineno = atoms[0].lineno
code = atoms[-1]
- names, defaults, flags = parse_arglist(slicecut(atoms, 1, -2))
+ names, defaults, flags = parse_arglist(slicecut(atoms, 1, -2), builder)
builder.push(ast.Lambda(names, defaults, flags, code, lineno))
+
def build_trailer(builder, nb):
"""trailer: '(' ')' | '(' arglist ')' | '[' subscriptlist ']' | '.' NAME
"""
atoms = get_atoms(builder, nb)
first_token = atoms[0]
# Case 1 : '(' ...
- if isinstance(first_token, TokenObject) and first_token.name == tok.LPAR:
- if len(atoms) == 2: # and atoms[1].token == tok.RPAR:
+ if isinstance(first_token, TokenObject) and first_token.name == builder.parser.tokens['LPAR']:
+ if len(atoms) == 2: # and atoms[1].token == builder.parser.tokens['RPAR']:
builder.push(ArglistObject([], None, None, first_token.lineno))
elif len(atoms) == 3: # '(' Arglist ')'
# push arglist on the stack
builder.push(atoms[1])
- elif isinstance(first_token, TokenObject) and first_token.name == tok.LSQB:
+ elif isinstance(first_token, TokenObject) and first_token.name == builder.parser.tokens['LSQB']:
if len(atoms) == 3 and isinstance(atoms[1], SlicelistObject):
builder.push(atoms[1])
else:
@@ -994,6 +499,7 @@
else:
assert False, "Trailer reducing implementation incomplete !"
+
def build_arglist(builder, nb):
"""
arglist: (argument ',')* ( '*' test [',' '**' test] |
@@ -1002,7 +508,7 @@
[argument ','] )
"""
atoms = get_atoms(builder, nb)
- arguments, stararg, dstararg = parse_argument(atoms)
+ arguments, stararg, dstararg = parse_argument(atoms, builder)
if atoms:
lineno = atoms[0].lineno
else:
@@ -1010,16 +516,17 @@
builder.push(ArglistObject(arguments, stararg, dstararg, lineno))
+
def build_subscript(builder, nb):
"""'.' '.' '.' | [test] ':' [test] [':' [test]] | test"""
atoms = get_atoms(builder, nb)
token = atoms[0]
lineno = token.lineno
- if isinstance(token, TokenObject) and token.name == tok.DOT:
+ if isinstance(token, TokenObject) and token.name == builder.parser.tokens['DOT']:
# Ellipsis:
builder.push(ast.Ellipsis(lineno))
elif len(atoms) == 1:
- if isinstance(token, TokenObject) and token.name == tok.COLON:
+ if isinstance(token, TokenObject) and token.name == builder.parser.tokens['COLON']:
sliceinfos = [None, None, None]
builder.push(SlicelistObject('slice', sliceinfos, lineno))
else:
@@ -1029,7 +536,7 @@
sliceinfos = [None, None, None]
infosindex = 0
for token in atoms:
- if isinstance(token, TokenObject) and token.name == tok.COLON:
+ if isinstance(token, TokenObject) and token.name == builder.parser.tokens['COLON']:
infosindex += 1
else:
sliceinfos[infosindex] = token
@@ -1044,7 +551,6 @@
else:
builder.push(SlicelistObject('slice', sliceinfos, lineno))
-
def build_listmaker(builder, nb):
"""listmaker: test ( list_for | (',' test)* [','] )"""
atoms = get_atoms(builder, nb)
@@ -1055,7 +561,7 @@
if token.get_value() == 'for':
# list comp
expr = atoms[0]
- list_for = parse_listcomp(atoms[1:])
+ list_for = parse_listcomp(atoms[1:], builder)
builder.push(ast.ListComp(expr, list_for, lineno))
return
# regular list building (like in [1, 2, 3,])
@@ -1077,13 +583,15 @@
nodes = []
# remove '@', '(' and ')' from atoms and use parse_attraccess
for token in atoms[1:]:
- if isinstance(token, TokenObject) and \
- token.name in (tok.LPAR, tok.RPAR, tok.NEWLINE):
+ if isinstance(token, TokenObject) and (
+ token.name == builder.parser.tokens['LPAR']
+ or token.name == builder.parser.tokens['RPAR']
+ or token.name == builder.parser.tokens['NEWLINE']):
# skip those ones
continue
else:
nodes.append(token)
- obj = parse_attraccess(nodes)
+ obj = parse_attraccess(nodes, builder)
builder.push(obj)
def build_funcdef(builder, nb):
@@ -1112,7 +620,7 @@
arglist = []
index = 3
arglist = slicecut(atoms, 3, -3)
- names, default, flags = parse_arglist(arglist)
+ names, default, flags = parse_arglist(arglist, builder)
funcname_token = atoms[1]
assert isinstance(funcname_token, TokenObject)
funcname = funcname_token.get_value()
@@ -1293,7 +801,7 @@
while index < l:
as_name = None
# dotted name (a.b.c)
- incr, name = parse_dotted_names(atoms[index:])
+ incr, name = parse_dotted_names(atoms[index:], builder)
index += incr
# 'as' value
if index < l:
@@ -1310,11 +818,11 @@
while index<l:
atom = atoms[index]
# for atom in atoms[index:]:
- if isinstance(atom, TokenObject) and atom.name == tok.COMMA:
+ if isinstance(atom, TokenObject) and atom.name == builder.parser.tokens['COMMA']:
break
index += 1
## while index < l and isinstance(atoms[index], TokenObject) and \
-## atoms[index].name != tok.COMMA:
+## atoms[index].name != builder.parser.tokens['COMMA']:
## index += 1
index += 1
builder.push(ast.Import(names, atoms[0].lineno))
@@ -1329,14 +837,14 @@
"""
atoms = get_atoms(builder, nb)
index = 1
- incr, from_name = parse_dotted_names(atoms[index:])
+ incr, from_name = parse_dotted_names(atoms[index:], builder)
index += (incr + 1) # skip 'import'
token = atoms[index]
assert isinstance(token, TokenObject) # XXX
- if token.name == tok.STAR:
+ if token.name == builder.parser.tokens['STAR']:
names = [('*', None)]
else:
- if token.name == tok.LPAR:
+ if token.name == builder.parser.tokens['LPAR']:
# mutli-line imports
tokens = slicecut( atoms, index+1, -1 )
else:
@@ -1417,14 +925,14 @@
start = 1
if l > 1:
token = atoms[1]
- if isinstance(token, TokenObject) and token.name == tok.RIGHTSHIFT:
+ if isinstance(token, TokenObject) and token.name == builder.parser.tokens['RIGHTSHIFT']:
dest = atoms[2]
# skip following comma
start = 4
for index in range(start, l, 2):
items.append(atoms[index])
last_token = atoms[-1]
- if isinstance(last_token, TokenObject) and last_token.name == tok.COMMA:
+ if isinstance(last_token, TokenObject) and last_token.name == builder.parser.tokens['COMMA']:
builder.push(ast.Print(items, dest, atoms[0].lineno))
else:
builder.push(ast.Printnl(items, dest, atoms[0].lineno))
@@ -1464,8 +972,8 @@
"""
atoms = get_atoms(builder, nb)
- l = len(atoms)
handlers = []
+ l = len(atoms)
else_ = None
body = atoms[2]
token = atoms[3]
@@ -1544,133 +1052,7 @@
'eval_input' : build_eval_input,
'with_stmt' : build_with_stmt,
}
-
-# Build two almost identical ASTRULES dictionaries
-ASTRULES = dict([(sym[key], value) for (key, value) in
- ASTRULES_Template.iteritems() if key in sym])
-del ASTRULES_Template
-
-## Stack elements definitions ###################################
-
-class BaseRuleObject(ast.Node):
- """Base class for unnamed rules"""
- def __init__(self, count, lineno):
- self.count = count
- self.lineno = lineno # src.getline()
- self.col = 0 # src.getcol()
-
-
-class RuleObject(BaseRuleObject):
- """A simple object used to wrap a rule or token"""
- def __init__(self, name, count, lineno):
- BaseRuleObject.__init__(self, count, lineno)
- self.rulename = name
-
- def __str__(self):
- return "<Rule: %s/%d>" % (sym.sym_name[self.rulename], self.count)
-
- def __repr__(self):
- return "<Rule: %s/%d>" % (sym.sym_name[self.rulename], self.count)
-
-
-class TempRuleObject(BaseRuleObject):
- """used to keep track of how many items get_atom() should pop"""
-
- def __init__(self, name, count, lineno):
- BaseRuleObject.__init__(self, count, lineno)
- self.temp_rulename = name
-
- def __str__(self):
- return "<Rule: %s/%d>" % (self.temp_rulename, self.count)
-
- def __repr__(self):
- return "<Rule: %s/%d>" % (self.temp_rulename, self.count)
-
-
-class TokenObject(ast.Node):
- """A simple object used to wrap a rule or token"""
- def __init__(self, name, value, lineno):
- self.name = name
- self.value = value
- self.count = 0
- # self.line = 0 # src.getline()
- self.col = 0 # src.getcol()
- self.lineno = lineno
-
- def get_name(self):
- return tok.tok_rpunct.get(self.name,
- tok.tok_name.get(self.name, str(self.name)))
-
- def get_value(self):
- value = self.value
- if value is None:
- value = ''
- return value
-
- def __str__(self):
- return "<Token: (%s,%s)>" % (self.get_name(), self.value)
-
- def __repr__(self):
- return "<Token: (%r,%s)>" % (self.get_name(), self.value)
-
-
-class ObjectAccessor(ast.Node):
- """base class for ArglistObject, SubscriptObject and SlicelistObject
-
- FIXME: think about a more appropriate name
- """
-
-class ArglistObject(ObjectAccessor):
- """helper class to build function's arg list
- """
- def __init__(self, arguments, stararg, dstararg, lineno):
- self.fake_rulename = 'arglist'
- self.arguments = arguments
- self.stararg = stararg
- self.dstararg = dstararg
- self.lineno = lineno
-
- def __str__(self):
- return "<ArgList: (%s, %s, %s)>" % self.value
-
- def __repr__(self):
- return "<ArgList: (%s, %s, %s)>" % self.value
-
-class SubscriptObject(ObjectAccessor):
- """helper class to build subscript list
-
- self.value represents the __getitem__ argument
- """
- def __init__(self, name, value, lineno):
- self.fake_rulename = name
- self.value = value
- self.lineno = lineno
-
- def __str__(self):
- return "<SubscriptList: (%s)>" % self.value
-
- def __repr__(self):
- return "<SubscriptList: (%s)>" % self.value
-
-class SlicelistObject(ObjectAccessor):
- """helper class to build slice objects
-
- self.value is a list [start, end, step]
- self.fake_rulename can either be 'slice' or 'sliceobj' depending
- on if a step is specfied or not (see Python's AST
- for more information on that)
- """
- def __init__(self, name, value, lineno):
- self.fake_rulename = name
- self.value = value
- self.lineno = lineno
-
- def __str__(self):
- return "<SliceList: (%s)>" % self.value
-
- def __repr__(self):
- return "<SliceList: (%s)>" % self.value
-
+
class AstBuilderContext(AbstractContext):
"""specific context management for AstBuidler"""
@@ -1681,97 +1063,87 @@
class AstBuilder(BaseGrammarBuilder):
"""A builder that directly produce the AST"""
- def __init__(self, rules=None, debug=0, space=None):
- BaseGrammarBuilder.__init__(self, rules, debug)
+ def __init__(self, parser, debug=0, space=None):
+ BaseGrammarBuilder.__init__(self, parser, debug)
self.rule_stack = []
self.space = space
self.source_encoding = None
self.with_enabled = False
+ self.build_rules = ASTRULES_Template
+ self.user_build_rules = {}
def enable_with(self):
if self.with_enabled:
return
self.with_enabled = True
- self.keywords.update({'with':None, 'as': None})
-
+ # XXX
+ # self.keywords.update({'with':None, 'as': None})
+
def context(self):
return AstBuilderContext(self.rule_stack)
def restore(self, ctx):
-## if DEBUG_MODE:
-## print "Restoring context (%s)" % (len(ctx.rule_stack))
assert isinstance(ctx, AstBuilderContext)
assert len(self.rule_stack) >= ctx.d
del self.rule_stack[ctx.d:]
- #self.rule_stack = ctx.rule_stack
def pop(self):
return self.rule_stack.pop(-1)
def push(self, obj):
self.rule_stack.append(obj)
- if not isinstance(obj, RuleObject) and not isinstance(obj, TokenObject):
-## if DEBUG_MODE:
-## print "Pushed:", str(obj), len(self.rule_stack)
- pass
- elif isinstance(obj, TempRuleObject):
-## if DEBUG_MODE:
-## print "Pushed:", str(obj), len(self.rule_stack)
- pass
- # print "\t", self.rule_stack
def push_tok(self, name, value, src ):
- self.push( TokenObject( name, value, src._token_lnum ) )
+ self.push( TokenObject( name, value, src._token_lnum, self.parser ) )
def push_rule(self, name, count, src ):
- self.push( RuleObject( name, count, src._token_lnum ) )
+ self.push( RuleObject( name, count, src._token_lnum, self.parser ) )
def alternative( self, rule, source ):
# Do nothing, keep rule on top of the stack
-## rule_stack = self.rule_stack[:]
if rule.is_root():
-## if DEBUG_MODE:
-## print "ALT:", sym.sym_name[rule.codename], self.rule_stack
- builder_func = ASTRULES.get(rule.codename, None)
- if builder_func:
- builder_func(self, 1)
+ rulename = self.parser.sym_name[rule.codename]
+ # builder_func = ASTRULES.get(rule.codename, None)
+ w_func = self.user_build_rules.get(rulename, None)
+ # user defined (applevel) function
+ if w_func:
+ w_items = self.space.newlist( [self.space.wrap( it ) for it in get_atoms(self, 1)] )
+ w_astnode = self.space.call_function(w_func, w_items)
+ astnode = self.space.interp_w(ast.Node, w_astnode, can_be_None=False)
+ self.push(astnode)
else:
-## if DEBUG_MODE:
-## print "No reducing implementation for %s, just push it on stack" % (
-## sym.sym_name[rule.codename])
- self.push_rule(rule.codename, 1, source)
+ builder_func = self.build_rules.get(rulename, None)
+ if builder_func:
+ builder_func(self, 1)
+ else:
+ self.push_rule(rule.codename, 1, source)
else:
self.push_rule(rule.codename, 1, source)
-## if DEBUG_MODE > 1:
-## show_stack(rule_stack, self.rule_stack)
-## x = raw_input("Continue ?")
return True
def sequence(self, rule, source, elts_number):
""" """
-## rule_stack = self.rule_stack[:]
if rule.is_root():
-## if DEBUG_MODE:
-## print "SEQ:", sym.sym_name[rule.codename]
- builder_func = ASTRULES.get(rule.codename, None)
- if builder_func:
- # print "REDUCING SEQUENCE %s" % sym.sym_name[rule.codename]
- builder_func(self, elts_number)
+ rulename = self.parser.sym_name[rule.codename]
+ # builder_func = ASTRULES.get(rule.codename, None)
+ w_func = self.user_build_rules.get(rulename, None)
+ # user defined (applevel) function
+ if w_func:
+ w_items = self.space.newlist( [self.space.wrap( it ) for it in get_atoms(self, elts_number)] )
+ w_astnode = self.space.call_function(w_func, w_items)
+ astnode = self.space.interp_w(ast.Node, w_astnode, can_be_None=False)
+ self.push(astnode)
else:
-## if DEBUG_MODE:
-## print "No reducing implementation for %s, just push it on stack" % (
-## sym.sym_name[rule.codename])
- self.push_rule(rule.codename, elts_number, source)
+ builder_func = self.build_rules.get(rulename, None)
+ if builder_func:
+ builder_func(self, elts_number)
+ else:
+ self.push_rule(rule.codename, elts_number, source)
else:
self.push_rule(rule.codename, elts_number, source)
-## if DEBUG_MODE > 1:
-## show_stack(rule_stack, self.rule_stack)
-## raw_input("Continue ?")
return True
def token(self, name, value, source):
-## if DEBUG_MODE:
-## print "TOK:", tok.tok_name[name], name, value
self.push_tok(name, value, source)
return True
@@ -1799,7 +1171,7 @@
return space.call_function(f, space.wrap(value))
def is_basestring_const(self, expr):
- if not isinstance(expr,ast.Const):
+ if not isinstance(expr, ast.Const):
return False
space = self.space
return space.is_true(space.isinstance(expr.value,space.w_basestring))
Added: pypy/dist/pypy/interpreter/pyparser/asthelper.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/interpreter/pyparser/asthelper.py Wed Feb 28 18:30:48 2007
@@ -0,0 +1,635 @@
+from pypy.interpreter.typedef import TypeDef, GetSetProperty, interp_attrproperty
+from pypy.interpreter.astcompiler import ast, consts
+from pypy.interpreter.pyparser.error import SyntaxError
+
+
+### Parsing utilites #################################################
+def parse_except_clause(tokens):
+ """parses 'except' [test [',' test]] ':' suite
+ and returns a 4-tuple : (tokens_read, expr1, expr2, except_body)
+ """
+ lineno = tokens[0].lineno
+ clause_length = 1
+ # Read until end of except clause (bound by following 'else',
+ # or 'except' or end of tokens)
+ while clause_length < len(tokens):
+ token = tokens[clause_length]
+ if isinstance(token, TokenObject) and \
+ (token.get_value() == 'except' or token.get_value() == 'else'):
+ break
+ clause_length += 1
+ if clause_length == 3:
+ # case 'except: body'
+ return (3, None, None, tokens[2])
+ elif clause_length == 4:
+ # case 'except Exception: body':
+ return (4, tokens[1], None, tokens[3])
+ else:
+ # case 'except Exception, exc: body'
+ return (6, tokens[1], to_lvalue(tokens[3], consts.OP_ASSIGN), tokens[5])
+
+
+def parse_dotted_names(tokens, builder):
+ """parses NAME('.' NAME)* and returns full dotted name
+
+ this function doesn't assume that the <tokens> list ends after the
+ last 'NAME' element
+ """
+ first = tokens[0]
+ assert isinstance(first, TokenObject)
+ name = first.get_value()
+ l = len(tokens)
+ index = 1
+ for index in range(1, l, 2):
+ token = tokens[index]
+ assert isinstance(token, TokenObject)
+ if token.name != builder.parser.tokens['DOT']:
+ break
+ token = tokens[index+1]
+ assert isinstance(token, TokenObject)
+ name += '.'
+ value = token.get_value()
+ name += value
+ return (index, name)
+
+def parse_argument(tokens, builder):
+ """parses function call arguments"""
+ l = len(tokens)
+ index = 0
+ arguments = []
+ last_token = None
+ building_kw = False
+ kw_built = False
+ stararg_token = None
+ dstararg_token = None
+ while index < l:
+ cur_token = tokens[index]
+ if not isinstance(cur_token, TokenObject):
+ index += 1
+ if not building_kw:
+ arguments.append(cur_token)
+ else:
+ last_token = arguments.pop()
+ assert isinstance(last_token, ast.Name) # used by rtyper
+ arguments.append(ast.Keyword(last_token.varname, cur_token, last_token.lineno))
+ building_kw = False
+ kw_built = True
+ continue
+ elif cur_token.name == builder.parser.tokens['COMMA']:
+ index += 1
+ continue
+ elif cur_token.name == builder.parser.tokens['EQUAL']:
+ index += 1
+ building_kw = True
+ continue
+ elif cur_token.name == builder.parser.tokens['STAR'] or cur_token.name == builder.parser.tokens['DOUBLESTAR']:
+ index += 1
+ if cur_token.name == builder.parser.tokens['STAR']:
+ stararg_token = tokens[index]
+ index += 1
+ if index >= l:
+ break
+ index += 2 # Skip COMMA and DOUBLESTAR
+ dstararg_token = tokens[index]
+ break
+ elif cur_token.get_value() == 'for':
+ if len(arguments) != 1:
+ raise SyntaxError("invalid syntax", cur_token.lineno,
+ cur_token.col)
+ expr = arguments[0]
+ genexpr_for = parse_genexpr_for(tokens[index:])
+ genexpr_for[0].is_outmost = True
+ gexp = ast.GenExpr(ast.GenExprInner(expr, genexpr_for, expr.lineno), expr.lineno)
+ arguments[0] = gexp
+ break
+ return arguments, stararg_token, dstararg_token
+
+
+def parse_fpdef(tokens, index, builder):
+ """fpdef: fpdef: NAME | '(' fplist ')'
+ fplist: fpdef (',' fpdef)* [',']
+
+ This intend to be a RPYTHON compliant implementation of _parse_fpdef,
+ but it can't work with the default compiler.
+ We switched to use astcompiler module now
+ """
+ nodes = []
+ comma = False
+ while True:
+ token = tokens[index]
+ index += 1
+ assert isinstance(token, TokenObject)
+ if token.name == builder.parser.tokens['LPAR']: # nested item
+ index, node = parse_fpdef(tokens, index, builder)
+ elif token.name == builder.parser.tokens['RPAR']: # end of current nesting
+ break
+ else: # name
+ val = token.get_value()
+ node = ast.AssName(val, consts.OP_ASSIGN, token.lineno)
+ nodes.append(node)
+
+ token = tokens[index]
+ index += 1
+ assert isinstance(token, TokenObject)
+ if token.name == builder.parser.tokens['COMMA']:
+ comma = True
+ else:
+ assert token.name == builder.parser.tokens['RPAR']
+ break
+ if len(nodes) == 1 and not comma:
+ node = nodes[0]
+ else:
+ node = ast.AssTuple(nodes, token.lineno)
+ return index, node
+
+def parse_arglist(tokens, builder):
+ """returns names, defaults, flags"""
+ l = len(tokens)
+ index = 0
+ defaults = []
+ names = []
+ flags = 0
+ first_with_default = -1
+ while index < l:
+ cur_token = tokens[index]
+ index += 1
+ if not isinstance(cur_token, TokenObject):
+ # XXX: think of another way to write this test
+ defaults.append(cur_token)
+ if first_with_default == -1:
+ first_with_default = len(names) - 1
+ elif cur_token.name == builder.parser.tokens['COMMA']:
+ # We could skip test COMMA by incrementing index cleverly
+ # but we might do some experiment on the grammar at some point
+ continue
+ elif cur_token.name == builder.parser.tokens['LPAR']:
+ index, node = parse_fpdef(tokens, index, builder)
+ names.append(node)
+ elif cur_token.name == builder.parser.tokens['STAR'] or cur_token.name == builder.parser.tokens['DOUBLESTAR']:
+ if cur_token.name == builder.parser.tokens['STAR']:
+ cur_token = tokens[index]
+ assert isinstance(cur_token, TokenObject)
+ index += 1
+ if cur_token.name == builder.parser.tokens['NAME']:
+ val = cur_token.get_value()
+ names.append( ast.AssName( val, consts.OP_ASSIGN ) )
+ flags |= consts.CO_VARARGS
+ index += 1
+ if index >= l:
+ break
+ else:
+ # still more tokens to read
+ cur_token = tokens[index]
+ index += 1
+ else:
+ raise SyntaxError("incomplete varags", cur_token.lineno,
+ cur_token.col)
+ assert isinstance(cur_token, TokenObject)
+ if cur_token.name != builder.parser.tokens['DOUBLESTAR']:
+ raise SyntaxError("Unexpected token", cur_token.lineno,
+ cur_token.col)
+ cur_token = tokens[index]
+ index += 1
+ assert isinstance(cur_token, TokenObject)
+ if cur_token.name == builder.parser.tokens['NAME']:
+ val = cur_token.get_value()
+ names.append( ast.AssName( val, consts.OP_ASSIGN ) )
+ flags |= consts.CO_VARKEYWORDS
+ index += 1
+ else:
+ raise SyntaxError("incomplete varags", cur_token.lineno,
+ cur_token.col)
+ if index < l:
+ token = tokens[index]
+ raise SyntaxError("unexpected token" , token.lineno,
+ token.col)
+ elif cur_token.name == builder.parser.tokens['NAME']:
+ val = cur_token.get_value()
+ names.append( ast.AssName( val, consts.OP_ASSIGN ) )
+
+ if first_with_default != -1:
+ num_expected_with_default = len(names) - first_with_default
+ if flags & consts.CO_VARKEYWORDS:
+ num_expected_with_default -= 1
+ if flags & consts.CO_VARARGS:
+ num_expected_with_default -= 1
+ if len(defaults) != num_expected_with_default:
+ raise SyntaxError('non-default argument follows default argument',
+ tokens[0].lineno, tokens[0].col)
+ return names, defaults, flags
+
+
+def parse_listcomp(tokens, builder):
+ """parses 'for j in k for i in j if i %2 == 0' and returns
+ a GenExprFor instance
+ XXX: refactor with listmaker ?
+ """
+ list_fors = []
+ ifs = []
+ index = 0
+ if tokens:
+ lineno = tokens[0].lineno
+ else:
+ lineno = -1
+ while index < len(tokens):
+ token = tokens[index]
+ assert isinstance(token, TokenObject) # rtyper info + check
+ if token.get_value() == 'for':
+ index += 1 # skip 'for'
+ ass_node = to_lvalue(tokens[index], consts.OP_ASSIGN)
+ index += 2 # skip 'in'
+ iterables = [tokens[index]]
+ index += 1
+ while index < len(tokens):
+ tok2 = tokens[index]
+ if not isinstance(tok2, TokenObject):
+ break
+ if tok2.name != builder.parser.tokens['COMMA']:
+ break
+ iterables.append(tokens[index+1])
+ index += 2
+ if len(iterables) == 1:
+ iterable = iterables[0]
+ else:
+ iterable = ast.Tuple(iterables, token.lineno)
+ while index < len(tokens):
+ token = tokens[index]
+ assert isinstance(token, TokenObject) # rtyper info
+ if token.get_value() == 'if':
+ ifs.append(ast.ListCompIf(tokens[index+1], token.lineno))
+ index += 2
+ else:
+ break
+ list_fors.append(ast.ListCompFor(ass_node, iterable, ifs, lineno))
+ ifs = []
+ else:
+ assert False, 'Unexpected token: expecting for in listcomp'
+ #
+ # Original implementation:
+ #
+ # if tokens[index].get_value() == 'for':
+ # index += 1 # skip 'for'
+ # ass_node = to_lvalue(tokens[index], consts.OP_ASSIGN)
+ # index += 2 # skip 'in'
+ # iterable = tokens[index]
+ # index += 1
+ # while index < len(tokens) and tokens[index].get_value() == 'if':
+ # ifs.append(ast.ListCompIf(tokens[index+1]))
+ # index += 2
+ # list_fors.append(ast.ListCompFor(ass_node, iterable, ifs))
+ # ifs = []
+ # else:
+ # raise ValueError('Unexpected token: %s' % tokens[index])
+ return list_fors
+
+
+def parse_genexpr_for(tokens):
+ """parses 'for j in k for i in j if i %2 == 0' and returns
+ a GenExprFor instance
+ XXX: if RPYTHON supports to pass a class object to a function,
+ we could refactor parse_listcomp and parse_genexpr_for,
+ and call :
+ - parse_listcomp(tokens, forclass=ast.GenExprFor, ifclass=...)
+ or:
+ - parse_listcomp(tokens, forclass=ast.ListCompFor, ifclass=...)
+ """
+ genexpr_fors = []
+ ifs = []
+ index = 0
+ if tokens:
+ lineno = tokens[0].lineno
+ else:
+ lineno = -1
+ while index < len(tokens):
+ token = tokens[index]
+ assert isinstance(token, TokenObject) # rtyper info + check
+ if token.get_value() == 'for':
+ index += 1 # skip 'for'
+ ass_node = to_lvalue(tokens[index], consts.OP_ASSIGN)
+ index += 2 # skip 'in'
+ iterable = tokens[index]
+ index += 1
+ while index < len(tokens):
+ token = tokens[index]
+ assert isinstance(token, TokenObject) # rtyper info
+ if token.get_value() == 'if':
+ ifs.append(ast.GenExprIf(tokens[index+1], token.lineno))
+ index += 2
+ else:
+ break
+ genexpr_fors.append(ast.GenExprFor(ass_node, iterable, ifs, lineno))
+ ifs = []
+ else:
+ raise SyntaxError('invalid syntax',
+ token.lineno, token.col)
+ return genexpr_fors
+
+def get_docstring(builder,stmt):
+ """parses a Stmt node.
+
+ If a docstring if found, the Discard node is **removed**
+ from <stmt> and the docstring is returned.
+
+ If no docstring is found, <stmt> is left unchanged
+ and None is returned
+ """
+ if not isinstance(stmt, ast.Stmt):
+ return None
+ doc = builder.wrap_none()
+ if len(stmt.nodes):
+ first_child = stmt.nodes[0]
+ if isinstance(first_child, ast.Discard):
+ expr = first_child.expr
+ if builder.is_basestring_const(expr):
+ # This *is* a docstring, remove it from stmt list
+ assert isinstance(expr, ast.Const)
+ del stmt.nodes[0]
+ doc = expr.value
+ return doc
+
+
+def to_lvalue(ast_node, flags):
+ lineno = ast_node.lineno
+ if isinstance( ast_node, ast.Name ):
+ return ast.AssName(ast_node.varname, flags, lineno)
+ # return ast.AssName(ast_node.name, flags)
+ elif isinstance(ast_node, ast.Tuple):
+ nodes = []
+ # FIXME: should ast_node.getChildren() but it's not annotable
+ # because of flatten()
+ for node in ast_node.nodes:
+ nodes.append(to_lvalue(node, flags))
+ return ast.AssTuple(nodes, lineno)
+ elif isinstance(ast_node, ast.List):
+ nodes = []
+ # FIXME: should ast_node.getChildren() but it's not annotable
+ # because of flatten()
+ for node in ast_node.nodes:
+ nodes.append(to_lvalue(node, flags))
+ return ast.AssList(nodes, lineno)
+ elif isinstance(ast_node, ast.Getattr):
+ expr = ast_node.expr
+ assert isinstance(ast_node, ast.Getattr)
+ attrname = ast_node.attrname
+ return ast.AssAttr(expr, attrname, flags, lineno)
+ elif isinstance(ast_node, ast.Subscript):
+ ast_node.flags = flags
+ return ast_node
+ elif isinstance(ast_node, ast.Slice):
+ ast_node.flags = flags
+ return ast_node
+ else:
+ if isinstance(ast_node, ast.GenExpr):
+ raise SyntaxError("assign to generator expression not possible",
+ lineno, 0, '')
+ elif isinstance(ast_node, ast.ListComp):
+ raise SyntaxError("can't assign to list comprehension",
+ lineno, 0, '')
+ elif isinstance(ast_node, ast.CallFunc):
+ if flags == consts.OP_DELETE:
+ raise SyntaxError("can't delete function call",
+ lineno, 0, '')
+ else:
+ raise SyntaxError("can't assign to function call",
+ lineno, 0, '')
+ else:
+ raise SyntaxError("can't assign to non-lvalue",
+ lineno, 0, '')
+
+def is_augassign( ast_node ):
+ if ( isinstance( ast_node, ast.Name ) or
+ isinstance( ast_node, ast.Slice ) or
+ isinstance( ast_node, ast.Subscript ) or
+ isinstance( ast_node, ast.Getattr ) ):
+ return True
+ return False
+
+def get_atoms(builder, nb):
+ atoms = []
+ i = nb
+ while i>0:
+ obj = builder.pop()
+ if isinstance(obj, BaseRuleObject):
+ i += obj.count
+ else:
+ atoms.append( obj )
+ i -= 1
+ atoms.reverse()
+ return atoms
+
+#def eval_string(value):
+# """temporary implementation
+#
+# FIXME: need to be finished (check compile.c (parsestr) and
+# stringobject.c (PyString_DecodeEscape()) for complete implementation)
+# """
+# # return eval(value)
+# if len(value) == 2:
+# return ''
+# result = ''
+# length = len(value)
+# quotetype = value[0]
+# index = 1
+# while index < length and value[index] == quotetype:
+# index += 1
+# if index == 6:
+# # empty strings like """""" or ''''''
+# return ''
+# # XXX: is it RPYTHON to do this value[index:-index]
+# chars = [char for char in value[index:len(value)-index]]
+# result = ''.join(chars)
+# result = result.replace('\\\\', '\\')
+# d = {'\\b' : '\b', '\\f' : '\f', '\\t' : '\t', '\\n' : '\n',
+# '\\r' : '\r', '\\v' : '\v', '\\a' : '\a',
+# }
+# for escaped, value in d.items():
+# result = result.replace(escaped, value)
+# return result
+
+
+## misc utilities, especially for power: rule
+def reduce_callfunc(obj, arglist):
+ """generic factory for CallFunc nodes"""
+ assert isinstance(arglist, ArglistObject)
+ return ast.CallFunc(obj, arglist.arguments,
+ arglist.stararg, arglist.dstararg, arglist.lineno)
+
+def reduce_subscript(obj, subscript):
+ """generic factory for Subscript nodes"""
+ assert isinstance(subscript, SubscriptObject)
+ return ast.Subscript(obj, consts.OP_APPLY, subscript.value, subscript.lineno)
+
+def reduce_slice(obj, sliceobj):
+ """generic factory for Slice nodes"""
+ assert isinstance(sliceobj, SlicelistObject)
+ if sliceobj.fake_rulename == 'slice':
+ start = sliceobj.value[0]
+ end = sliceobj.value[1]
+ return ast.Slice(obj, consts.OP_APPLY, start, end, sliceobj.lineno)
+ else:
+ return ast.Subscript(obj, consts.OP_APPLY, ast.Sliceobj(sliceobj.value,
+ sliceobj.lineno), sliceobj.lineno)
+
+def parse_attraccess(tokens, builder):
+ """parses token list like ['a', '.', 'b', '.', 'c', ...]
+
+ and returns an ast node : ast.Getattr(Getattr(Name('a'), 'b'), 'c' ...)
+ """
+ token = tokens[0]
+ # XXX HACK for when parse_attraccess is called from build_decorator
+ if isinstance(token, TokenObject):
+ val = token.get_value()
+ result = ast.Name(val, token.lineno)
+ else:
+ result = token
+ index = 1
+ while index < len(tokens):
+ token = tokens[index]
+ if isinstance(token, TokenObject) and token.name == builder.parser.tokens['DOT']:
+ index += 1
+ token = tokens[index]
+ assert isinstance(token, TokenObject)
+ result = ast.Getattr(result, token.get_value(), token.lineno)
+ elif isinstance(token, ArglistObject):
+ result = reduce_callfunc(result, token)
+ elif isinstance(token, SubscriptObject):
+ result = reduce_subscript(result, token)
+ elif isinstance(token, SlicelistObject):
+ result = reduce_slice(result, token)
+ else:
+ assert False, "Don't know how to handle index %s of %s" % (index, len(tokens))
+ index += 1
+ return result
+
+
+
+## Stack elements definitions ###################################
+
+class BaseRuleObject(ast.Node):
+ """Base class for unnamed rules"""
+ def __init__(self, count, lineno):
+ self.count = count
+ self.lineno = lineno # src.getline()
+ self.col = 0 # src.getcol()
+
+
+class RuleObject(BaseRuleObject):
+ """A simple object used to wrap a rule or token"""
+ def __init__(self, name, count, lineno, parser):
+ BaseRuleObject.__init__(self, count, lineno)
+ self.rulename = name
+ self.parser = parser
+
+ def __str__(self):
+ return "<Rule: %s/%d>" % ( self.parser.symbol_repr(self.rulename), self.count)
+
+ def __repr__(self):
+ return "<Rule: %s/%d>" % ( self.parser.symbol_repr(self.rulename), self.count)
+
+
+class TempRuleObject(BaseRuleObject):
+ """used to keep track of how many items get_atom() should pop"""
+ def __init__(self, name, count, lineno):
+ BaseRuleObject.__init__(self, count, lineno)
+ self.temp_rulename = name
+
+ def __str__(self):
+ return "<Rule: %s/%d>" % (self.temp_rulename, self.count)
+
+ def __repr__(self):
+ return "<Rule: %s/%d>" % (self.temp_rulename, self.count)
+
+
+class TokenObject(ast.Node):
+ """A simple object used to wrap a rule or token"""
+ def __init__(self, name, value, lineno, parser):
+ self.name = name
+ self.value = value
+ self.count = 0
+ # self.line = 0 # src.getline()
+ self.col = 0 # src.getcol()
+ self.lineno = lineno
+ self.parser = parser
+
+ def get_name(self):
+ tokname = self.parser.tok_name.get(self.name, str(self.name))
+ return self.parser.tok_rvalues.get(self.name, tokname)
+
+ def get_value(self):
+ value = self.value
+ if value is None:
+ value = ''
+ return value
+
+ def descr_fget_value(space, self):
+ value = self.get_value()
+ return space.wrap(value)
+
+ def __str__(self):
+ return "<Token: (%s,%s)>" % (self.get_name(), self.value)
+
+ def __repr__(self):
+ return "<Token: (%r,%s)>" % (self.get_name(), self.value)
+
+TokenObject.typedef = TypeDef('BuildToken',
+ name=interp_attrproperty('name', cls=TokenObject),
+ lineno=interp_attrproperty('lineno', cls=TokenObject),
+ value=GetSetProperty(TokenObject.descr_fget_value))
+
+class ObjectAccessor(ast.Node):
+ """base class for ArglistObject, SubscriptObject and SlicelistObject
+
+ FIXME: think about a more appropriate name
+ """
+
+class ArglistObject(ObjectAccessor):
+ """helper class to build function's arg list
+ """
+ def __init__(self, arguments, stararg, dstararg, lineno):
+ self.fake_rulename = 'arglist'
+ self.arguments = arguments
+ self.stararg = stararg
+ self.dstararg = dstararg
+ self.lineno = lineno
+
+ def __str__(self):
+ return "<ArgList: (%s, %s, %s)>" % self.value
+
+ def __repr__(self):
+ return "<ArgList: (%s, %s, %s)>" % self.value
+
+class SubscriptObject(ObjectAccessor):
+ """helper class to build subscript list
+
+ self.value represents the __getitem__ argument
+ """
+ def __init__(self, name, value, lineno):
+ self.fake_rulename = name
+ self.value = value
+ self.lineno = lineno
+
+ def __str__(self):
+ return "<SubscriptList: (%s)>" % self.value
+
+ def __repr__(self):
+ return "<SubscriptList: (%s)>" % self.value
+
+class SlicelistObject(ObjectAccessor):
+ """helper class to build slice objects
+
+ self.value is a list [start, end, step]
+ self.fake_rulename can either be 'slice' or 'sliceobj' depending
+ on if a step is specfied or not (see Python's AST
+ for more information on that)
+ """
+ def __init__(self, name, value, lineno):
+ self.fake_rulename = name
+ self.value = value
+ self.lineno = lineno
+
+ def __str__(self):
+ return "<SliceList: (%s)>" % self.value
+
+ def __repr__(self):
+ return "<SliceList: (%s)>" % self.value
+
Modified: pypy/dist/pypy/interpreter/pyparser/ebnfgrammar.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/ebnfgrammar.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/ebnfgrammar.py Wed Feb 28 18:30:48 2007
@@ -1,47 +1,18 @@
# This module contains the grammar parser
# and the symbol mappings
-from grammar import BaseGrammarBuilder, Alternative, Sequence, Token, \
- KleeneStar, GrammarElement, build_first_sets, EmptyToken
+from grammar import Alternative, Sequence, Token, KleeneStar, \
+ GrammarElement, Parser
+class GrammarParser(Parser):
+ pass
-sym_map = {}
-sym_rmap = {}
-_count = 0
-
-def g_add_symbol( name ):
- global _count
- if name in sym_rmap:
- return sym_rmap[name]
- val = _count
- _count += 1
- sym_map[val] = name
- sym_rmap[name] = val
- return val
-
-
-tok_map = {}
-tok_rmap = {}
-
-def g_add_token( **kwargs ):
- global _count
- assert len(kwargs) == 1
- sym, name = kwargs.popitem()
- if name in tok_rmap:
- return tok_rmap[name]
- val = _count
- _count += 1
- tok_map[val] = name
- tok_rmap[name] = val
- sym_map[val] = sym
- sym_rmap[sym] = val
- return val
-
-g_add_token( EOF='EOF' )
+GRAMMAR_GRAMMAR = GrammarParser()
def grammar_grammar():
- """NOT RPYTHON (mostly because of g_add_token I suppose)
+ """
+ (mostly because of g_add_token I suppose)
Builds the grammar for the grammar file
Here's the description of the grammar's grammar ::
@@ -51,59 +22,56 @@
alternative: sequence ( '|' sequence )+
star: '*' | '+'
- sequence: (SYMBOL star? | STRING | option | group star? )+
+ sequence: (SYMBOL star? | STRING | option | group )+
option: '[' alternative ']'
group: '(' alternative ')' star?
"""
- global sym_map
- S = g_add_symbol
- T = g_add_token
+ p = GRAMMAR_GRAMMAR
+ p.add_token('EOF','EOF')
+
# star: '*' | '+'
- star = Alternative( S("star"), [Token(T(TOK_STAR='*')), Token(T(TOK_ADD='+'))] )
- star_opt = KleeneStar ( S("star_opt"), 0, 1, rule=star )
+ star = p.Alternative_n( "star", [p.Token_n('TOK_STAR', '*'), p.Token_n('TOK_ADD', '+')] )
+ star_opt = p.KleeneStar_n( "star_opt", 0, 1, rule=star )
# rule: SYMBOL ':' alternative
- symbol = Sequence( S("symbol"), [Token(T(TOK_SYMBOL='SYMBOL')), star_opt] )
- symboldef = Token( T(TOK_SYMDEF="SYMDEF") )
- alternative = Sequence( S("alternative"), [])
- rule = Sequence( S("rule"), [symboldef, alternative] )
+ symbol = p.Sequence_n( "symbol", [p.Token_n('TOK_SYMBOL'), star_opt] )
+ symboldef = p.Token_n( 'TOK_SYMDEF' )
+ alternative = p.Sequence_n( "alternative", [])
+ rule = p.Sequence_n( "rule", [symboldef, alternative] )
# grammar: rule+
- grammar = KleeneStar( S("grammar"), _min=1, rule=rule )
+ grammar = p.KleeneStar_n( "grammar", _min=1, rule=rule )
# alternative: sequence ( '|' sequence )*
- sequence = KleeneStar( S("sequence"), 1 )
- seq_cont_list = Sequence( S("seq_cont_list"), [Token(T(TOK_BAR='|')), sequence] )
- sequence_cont = KleeneStar( S("sequence_cont"),0, rule=seq_cont_list )
-
+ sequence = p.KleeneStar_n( "sequence", 1 )
+ seq_cont_list = p.Sequence_n( "seq_cont_list", [p.Token_n('TOK_BAR', '|'), sequence] )
+ sequence_cont = p.KleeneStar_n( "sequence_cont",0, rule=seq_cont_list )
+
alternative.args = [ sequence, sequence_cont ]
# option: '[' alternative ']'
- option = Sequence( S("option"), [Token(T(TOK_LBRACKET='[')), alternative, Token(T(TOK_RBRACKET=']'))] )
+ option = p.Sequence_n( "option", [p.Token_n('TOK_LBRACKET', '['), alternative, p.Token_n('TOK_RBRACKET', ']')] )
# group: '(' alternative ')'
- group = Sequence( S("group"), [Token(T(TOK_LPAR='(')), alternative, Token(T(TOK_RPAR=')')), star_opt] )
+ group = p.Sequence_n( "group", [p.Token_n('TOK_LPAR', '('), alternative, p.Token_n('TOK_RPAR', ')'), star_opt] )
# sequence: (SYMBOL | STRING | option | group )+
- string = Token(T(TOK_STRING='STRING'))
- alt = Alternative( S("sequence_alt"), [symbol, string, option, group] )
+ string = p.Token_n('TOK_STRING')
+ alt = p.Alternative_n( "sequence_alt", [symbol, string, option, group] )
sequence.args = [ alt ]
+ p.root_rules['grammar'] = grammar
+ p.build_first_sets()
+ return p
- rules = [ star, star_opt, symbol, alternative, rule, grammar, sequence,
- seq_cont_list, sequence_cont, option, group, alt ]
- build_first_sets( rules )
- return grammar
-
-
-GRAMMAR_GRAMMAR = grammar_grammar()
-for _sym, _value in sym_rmap.items():
- globals()[_sym] = _value
+grammar_grammar()
+for _sym, _value in GRAMMAR_GRAMMAR.symbols.items():
+ assert not hasattr( GRAMMAR_GRAMMAR, _sym ), _sym
+ setattr(GRAMMAR_GRAMMAR, _sym, _value )
+
+for _sym, _value in GRAMMAR_GRAMMAR.tokens.items():
+ assert not hasattr( GRAMMAR_GRAMMAR, _sym )
+ setattr(GRAMMAR_GRAMMAR, _sym, _value )
-# cleanup
-del _sym
-del _value
del grammar_grammar
-del g_add_symbol
-del g_add_token
Modified: pypy/dist/pypy/interpreter/pyparser/ebnflexer.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/ebnflexer.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/ebnflexer.py Wed Feb 28 18:30:48 2007
@@ -3,8 +3,8 @@
analyser in grammar.py
"""
-from grammar import TokenSource, Token
-from ebnfgrammar import *
+from grammar import TokenSource, Token, AbstractContext
+from ebnfgrammar import GRAMMAR_GRAMMAR as G
def match_symbol( input, start, stop ):
@@ -15,6 +15,12 @@
idx+=1
return idx
+
+class GrammarSourceContext(AbstractContext):
+ def __init__(self, pos, peek):
+ self.pos = pos
+ self.peek = peek
+
class GrammarSource(TokenSource):
"""Fully RPython - see targetebnflexer.py
The grammar tokenizer
@@ -25,8 +31,9 @@
SYMBOL: a rule symbol usually appearing right of a SYMDEF
tokens: '[', ']', '(' ,')', '*', '+', '|'
"""
- def __init__(self, inpstring ):
- TokenSource.__init__(self)
+ def __init__(self, parser, inpstring):
+ # TokenSource.__init__(self)
+ self.parser = parser
self.input = inpstring
self.pos = 0
self.begin = 0
@@ -36,7 +43,7 @@
def context(self):
"""returns an opaque context object, used to backtrack
to a well known position in the parser"""
- return self.pos, self._peeked
+ return GrammarSourceContext( self.pos, self._peeked )
def offset(self, ctx=None):
"""Returns the current parsing position from the start
@@ -44,14 +51,16 @@
if ctx is None:
return self.pos
else:
- assert type(ctx)==int
- return ctx
+ assert isinstance(ctx, GrammarSourceContext)
+ return ctx.pos
def restore(self, ctx):
"""restore the context provided by context()"""
- self.pos, self._peeked = ctx
+ assert isinstance( ctx, GrammarSourceContext )
+ self.pos = ctx.pos
+ self._peeked = ctx.peek
- def current_line(self):
+ def current_linesource(self):
pos = idx = self.begin
inp = self.input
end = len(inp)
@@ -65,7 +74,6 @@
def current_lineno(self):
return self.current_line
-
def skip_empty_lines(self, input, start, end ):
idx = start
# assume beginning of a line
@@ -117,17 +125,18 @@
# means backtracking more than one token
# will re-tokenize the stream (but this is the
# grammar lexer so we don't care really!)
+ _p = self.parser
if self._peeked is not None:
peeked = self._peeked
self._peeked = None
return peeked
-
+
pos = self.pos
inp = self.input
end = len(self.input)
pos = self.skip_empty_lines(inp,pos,end)
if pos==end:
- return Token(EOF, None)
+ return _p.build_token( _p.EOF, None)
# at this point nextchar is not a white space nor \n
nextchr = inp[pos]
@@ -139,22 +148,22 @@
self.pos = npos
_endpos = npos - 1
assert _endpos>=0
- return Token(TOK_STRING,inp[pos+1:_endpos])
+ return _p.build_token( _p.TOK_STRING, inp[pos+1:_endpos])
else:
npos = match_symbol( inp, pos, end)
if npos!=pos:
self.pos = npos
if npos!=end and inp[npos]==":":
self.pos += 1
- return Token(TOK_SYMDEF,inp[pos:npos])
+ return _p.build_token( _p.TOK_SYMDEF, inp[pos:npos])
else:
- return Token(TOK_SYMBOL,inp[pos:npos])
-
+ return _p.build_token( _p.TOK_SYMBOL, inp[pos:npos])
+
# we still have pos!=end here
chr = inp[pos]
if chr in "[]()*+|":
self.pos = pos+1
- return Token(tok_rmap[chr], chr)
+ return _p.build_token( _p.tok_values[chr], chr)
self.RaiseError( "Unknown token" )
def peek(self):
Modified: pypy/dist/pypy/interpreter/pyparser/ebnfparse.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/ebnfparse.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/ebnfparse.py Wed Feb 28 18:30:48 2007
@@ -1,15 +1,31 @@
-#!/usr/bin/env python
-from grammar import BaseGrammarBuilder, Alternative, Sequence, Token, \
- KleeneStar, GrammarElement, build_first_sets, EmptyToken
-from ebnflexer import GrammarSource
-import ebnfgrammar
-from ebnfgrammar import GRAMMAR_GRAMMAR, sym_map
-from syntaxtree import AbstractSyntaxVisitor
-import pytoken
-import pysymbol
+from grammar import Token, GrammarProxy
+from grammar import AbstractBuilder, AbstractContext
+
+
+ORDA = ord("A")
+ORDZ = ord("Z")
+ORDa = ord("a")
+ORDz = ord("z")
+ORD0 = ord("0")
+ORD9 = ord("9")
+ORD_ = ord("_")
+
+def is_py_name( name ):
+ if len(name)<1:
+ return False
+ v = ord(name[0])
+ if not (ORDA <= v <= ORDZ or
+ ORDa <= v <= ORDz or v == ORD_):
+ return False
+ for c in name:
+ v = ord(c)
+ if not (ORDA <= v <= ORDZ or
+ ORDa <= v <= ORDz or
+ ORD0 <= v <= ORD9 or
+ v == ORD_):
+ return False
+ return True
-import re
-py_name = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*", re.M)
punct=['>=', '<>', '!=', '<', '>', '<=', '==', '\\*=',
'//=', '%=', '^=', '<<=', '\\*\\*=', '\\', '=',
@@ -18,19 +34,14 @@
'%', '<<', '//', '\\', '', '\n\\)', '\\(', ';', ':',
'@', '\\[', '\\]', '`', '\\{', '\\}']
+TERMINALS = ['NAME', 'NUMBER', 'STRING', 'NEWLINE', 'ENDMARKER',
+ 'INDENT', 'DEDENT' ]
-TERMINALS = [
- 'NAME', 'NUMBER', 'STRING', 'NEWLINE', 'ENDMARKER',
- 'INDENT', 'DEDENT' ]
-
-
-## Grammar Visitors ##################################################
-# FIXME: parsertools.py ? parser/__init__.py ?
class NameToken(Token):
"""A token that is not a keyword"""
- def __init__(self, keywords=None ):
- Token.__init__(self, pytoken.NAME)
+ def __init__(self, parser, keywords=None):
+ Token.__init__(self, parser, parser.tokens['NAME'])
self.keywords = keywords
def match(self, source, builder, level=0):
@@ -44,227 +55,228 @@
else:
# error unknown or negative integer
"""
-
ctx = source.context()
tk = source.next()
- if tk.codename==self.codename:
- if tk.value not in builder.keywords:
+ if tk.codename == self.codename:
+ # XXX (adim): this is trunk's keyword management
+ # if tk.value not in builder.keywords:
+ if tk.value not in self.keywords:
ret = builder.token( tk.codename, tk.value, source )
- return self.debug_return( ret, tk.codename, tk.value )
+ return ret
source.restore( ctx )
return 0
-
+
+
def match_token(self, builder, other):
"""special case of match token for tokens which are really keywords
"""
if not isinstance(other, Token):
- raise RuntimeError("Unexpected token type %r" % other)
- if other is EmptyToken:
+ raise RuntimeError("Unexpected token type")
+ if other is self.parser.EmptyToken:
return False
if other.codename != self.codename:
return False
- if other.value in builder.keywords:
+ # XXX (adim): this is trunk's keyword management
+ # if other.value in builder.keywords:
+ if other.value in self.keywords:
return False
return True
-
-def ebnf_handle_grammar(self, node):
- for rule in node.nodes:
- rule.visit(self)
- # the rules are registered already
- # we do a pass through the variables to detect
- # terminal symbols from non terminals
- for r in self.items:
- for i,a in enumerate(r.args):
- if a.codename in self.rules:
- assert isinstance(a,Token)
- r.args[i] = self.rules[a.codename]
- if a.codename in self.terminals:
- del self.terminals[a.codename]
- # XXX .keywords also contains punctuations
- self.terminals['NAME'].keywords = self.keywords
-
-def ebnf_handle_rule(self, node):
- symdef = node.nodes[0].value
- self.current_rule = symdef
- self.current_subrule = 0
- alt = node.nodes[1]
- rule = alt.visit(self)
- if not isinstance(rule, Token):
- rule.codename = self.symbols.add_symbol( symdef )
- self.rules[rule.codename] = rule
-
-def ebnf_handle_alternative(self, node):
- items = [node.nodes[0].visit(self)]
- items += node.nodes[1].visit(self)
- if len(items) == 1 and not items[0].is_root():
- return items[0]
- alt = Alternative(self.new_symbol(), items)
- return self.new_item(alt)
-
-def ebnf_handle_sequence( self, node ):
- """ """
- items = []
- for n in node.nodes:
- items.append( n.visit(self) )
- if len(items)==1:
- return items[0]
- elif len(items)>1:
- return self.new_item( Sequence( self.new_symbol(), items) )
- raise RuntimeError("Found empty sequence")
-
-def ebnf_handle_sequence_cont( self, node ):
- """Returns a list of sequences (possibly empty)"""
- return [n.visit(self) for n in node.nodes]
-
-def ebnf_handle_seq_cont_list(self, node):
- return node.nodes[1].visit(self)
-
-
-def ebnf_handle_symbol(self, node):
- star_opt = node.nodes[1]
- sym = node.nodes[0].value
- terminal = self.terminals.get( sym, None )
- if not terminal:
- tokencode = pytoken.tok_values.get( sym, None )
- if tokencode is None:
- tokencode = self.symbols.add_symbol( sym )
- terminal = Token( tokencode )
- else:
- terminal = Token( tokencode )
- self.terminals[sym] = terminal
-
- return self.repeat( star_opt, terminal )
-
-def ebnf_handle_option( self, node ):
- rule = node.nodes[1].visit(self)
- return self.new_item( KleeneStar( self.new_symbol(), 0, 1, rule ) )
-
-def ebnf_handle_group( self, node ):
- rule = node.nodes[1].visit(self)
- return self.repeat( node.nodes[3], rule )
-
-def ebnf_handle_TOK_STRING( self, node ):
- value = node.value
- tokencode = pytoken.tok_punct.get( value, None )
- if tokencode is None:
- if not py_name.match( value ):
- raise RuntimeError("Unknown STRING value ('%s')" % value )
- # assume a keyword
- tok = Token( pytoken.NAME, value )
- if value not in self.keywords:
- self.keywords.append( value )
- else:
- # punctuation
- tok = Token( tokencode )
- return tok
-
-def ebnf_handle_sequence_alt( self, node ):
- res = node.nodes[0].visit(self)
- assert isinstance( res, GrammarElement )
- return res
-
-# This will setup a mapping between
-# ebnf_handle_xxx functions and ebnfgrammar.xxx
-ebnf_handles = {}
-for name, value in globals().items():
- if name.startswith("ebnf_handle_"):
- name = name[12:]
- key = getattr(ebnfgrammar, name )
- ebnf_handles[key] = value
-
-def handle_unknown( self, node ):
- raise RuntimeError("Unknown Visitor for %r" % node.name)
-
-
-class EBNFVisitor(AbstractSyntaxVisitor):
-
- def __init__(self):
- self.rules = {}
- self.terminals = {}
- self.current_rule = None
+class EBNFBuilderContext(AbstractContext):
+ def __init__(self, stackpos, seqcounts, altcounts):
+ self.stackpos = stackpos
+ self.seqcounts = seqcounts
+ self.altcounts = altcounts
+
+
+class EBNFBuilder(AbstractBuilder):
+ """Build a grammar tree"""
+ def __init__(self, gram_parser, dest_parser):
+ AbstractBuilder.__init__(self, dest_parser)
+ self.gram = gram_parser
+ self.rule_stack = []
+ self.seqcounts = [] # number of items in the current sequence
+ self.altcounts = [] # number of sequence in the current alternative
+ self.curaltcount = 0
+ self.curseqcount = 0
self.current_subrule = 0
+ self.current_rule = -1
+ self.current_rule_name = ""
+ self.tokens = {}
self.keywords = []
- self.items = []
- self.terminals['NAME'] = NameToken()
- self.symbols = pysymbol.SymbolMapper( pysymbol._cpython_symbols.sym_name )
+ NAME = dest_parser.add_token('NAME')
+ # NAME = dest_parser.tokens['NAME']
+ self.tokens[NAME] = NameToken(dest_parser, keywords=self.keywords)
+
+ def context(self):
+ return EBNFBuilderContext(len(self.rule_stack), self.seqcounts, self.altcounts)
+
+ def restore(self, ctx):
+ del self.rule_stack[ctx.stackpos:]
+ self.seqcounts = ctx.seqcounts
+ self.altcounts = ctx.altcounts
def new_symbol(self):
- rule_name = ":%s_%s" % (self.current_rule, self.current_subrule)
+ """Allocate and return a new (anonymous) grammar symbol whose
+ name is based on the current grammar rule being parsed"""
+ rule_name = ":" + self.current_rule_name + "_%d" % self.current_subrule
self.current_subrule += 1
- symval = self.symbols.add_anon_symbol( rule_name )
- return symval
+ name_id = self.parser.add_anon_symbol( rule_name )
+ return name_id
- def new_item(self, itm):
- self.items.append(itm)
- return itm
-
- def visit_syntaxnode( self, node ):
- visit_func = ebnf_handles.get( node.name, handle_unknown )
- return visit_func( self, node )
-
- def visit_tokennode( self, node ):
- return self.visit_syntaxnode( node )
-
- def visit_tempsyntaxnode( self, node ):
- return self.visit_syntaxnode( node )
-
-
- def repeat( self, star_opt, myrule ):
- assert isinstance( myrule, GrammarElement )
- if star_opt.nodes:
- rule_name = self.new_symbol()
- tok = star_opt.nodes[0].nodes[0]
- if tok.value == '+':
- item = KleeneStar(rule_name, _min=1, rule=myrule)
- return self.new_item(item)
- elif tok.value == '*':
- item = KleeneStar(rule_name, _min=0, rule=myrule)
- return self.new_item(item)
- else:
- raise RuntimeError("Got symbol star_opt with value='%s'"
- % tok.value)
- return myrule
+ def new_rule(self, rule):
+ """A simple helper method that registers a new rule as 'known'"""
+ self.parser.all_rules.append(rule)
+ return rule
+
+ def resolve_rules(self):
+ """Remove GrammarProxy objects"""
+ to_be_deleted = {}
+ for rule in self.parser.all_rules:
+ # for i, arg in enumerate(rule.args):
+ for i in range(len(rule.args)):
+ arg = rule.args[i]
+ if isinstance(arg, GrammarProxy):
+ real_rule = self.parser.root_rules[arg.codename]
+ if isinstance(real_rule, GrammarProxy):
+ # If we still have a GrammarProxy associated to this codename
+ # this means we have encountered a terminal symbol
+ to_be_deleted[ arg.codename ] = True
+ rule.args[i] = self.get_token( arg.codename )
+ #print arg, "-> Token(",arg.rule_name,")"
+ else:
+ #print arg, "->", real_rule
+ rule.args[i] = real_rule
+ for codename in to_be_deleted.keys():
+ del self.parser.root_rules[codename]
+
+ def get_token(self, codename ):
+ """Returns a new or existing Token"""
+ if codename in self.tokens:
+ return self.tokens[codename]
+ token = self.tokens[codename] = self.parser.build_token(codename)
+ return token
+
+ def get_symbolcode(self, name):
+ return self.parser.add_symbol( name )
+
+ def get_rule( self, name ):
+ if name in self.parser.tokens:
+ codename = self.parser.tokens[name]
+ return self.get_token( codename )
+ codename = self.get_symbolcode( name )
+ if codename in self.parser.root_rules:
+ return self.parser.root_rules[codename]
+ proxy = GrammarProxy( self.parser, name, codename )
+ self.parser.root_rules[codename] = proxy
+ return proxy
+ def alternative(self, rule, source):
+ return True
+ def pop_rules( self, count ):
+ offset = len(self.rule_stack)-count
+ assert offset>=0
+ rules = self.rule_stack[offset:]
+ del self.rule_stack[offset:]
+ return rules
+
+ def sequence(self, rule, source, elts_number):
+ _rule = rule.codename
+ if _rule == self.gram.sequence:
+ if self.curseqcount==1:
+ self.curseqcount = 0
+ self.curaltcount += 1
+ return True
+ rules = self.pop_rules(self.curseqcount)
+ new_rule = self.parser.build_sequence( self.new_symbol(), rules )
+ self.rule_stack.append( new_rule )
+ self.curseqcount = 0
+ self.curaltcount += 1
+ elif _rule == self.gram.alternative:
+ if self.curaltcount == 1:
+ self.curaltcount = 0
+ return True
+ rules = self.pop_rules(self.curaltcount)
+ new_rule = self.parser.build_alternative( self.new_symbol(), rules )
+ self.rule_stack.append( new_rule )
+ self.curaltcount = 0
+ elif _rule == self.gram.group:
+ self.curseqcount += 1
+ elif _rule == self.gram.option:
+ # pops the last alternative
+ rules = self.pop_rules( 1 )
+ new_rule = self.parser.build_kleenestar( self.new_symbol(), _min=0, _max=1, rule=rules[0] )
+ self.rule_stack.append( new_rule )
+ self.curseqcount += 1
+ elif _rule == self.gram.rule:
+ assert len(self.rule_stack)==1
+ old_rule = self.rule_stack[0]
+ del self.rule_stack[0]
+ if isinstance(old_rule,Token):
+ # Wrap a token into an alternative
+ old_rule = self.parser.build_alternative( self.current_rule, [old_rule] )
+ else:
+ # Make sure we use the codename from the named rule
+ old_rule.codename = self.current_rule
+ self.parser.root_rules[self.current_rule] = old_rule
+ self.current_subrule = 0
+ return True
+
+ def token(self, name, value, source):
+ if name == self.gram.TOK_STRING:
+ self.handle_TOK_STRING( name, value )
+ self.curseqcount += 1
+ elif name == self.gram.TOK_SYMDEF:
+ self.current_rule = self.get_symbolcode( value )
+ self.current_rule_name = value
+ elif name == self.gram.TOK_SYMBOL:
+ rule = self.get_rule( value )
+ self.rule_stack.append( rule )
+ self.curseqcount += 1
+ elif name == self.gram.TOK_STAR:
+ top = self.rule_stack[-1]
+ rule = self.parser.build_kleenestar( self.new_symbol(), _min=0, rule=top)
+ self.rule_stack[-1] = rule
+ elif name == self.gram.TOK_ADD:
+ top = self.rule_stack[-1]
+ rule = self.parser.build_kleenestar( self.new_symbol(), _min=1, rule=top)
+ self.rule_stack[-1] = rule
+ elif name == self.gram.TOK_BAR:
+ assert self.curseqcount == 0
+ elif name == self.gram.TOK_LPAR:
+ self.altcounts.append( self.curaltcount )
+ self.seqcounts.append( self.curseqcount )
+ self.curseqcount = 0
+ self.curaltcount = 0
+ elif name == self.gram.TOK_RPAR:
+ assert self.curaltcount == 0
+ self.curaltcount = self.altcounts.pop()
+ self.curseqcount = self.seqcounts.pop()
+ elif name == self.gram.TOK_LBRACKET:
+ self.altcounts.append( self.curaltcount )
+ self.seqcounts.append( self.curseqcount )
+ self.curseqcount = 0
+ self.curaltcount = 0
+ elif name == self.gram.TOK_RBRACKET:
+ assert self.curaltcount == 0
+ assert self.curseqcount == 0
+ self.curaltcount = self.altcounts.pop()
+ self.curseqcount = self.seqcounts.pop()
+ return True
-def parse_grammar(stream):
- """parses the grammar file
-
- stream : file-like object representing the grammar to parse
- """
- source = GrammarSource(stream.read())
- builder = BaseGrammarBuilder()
- result = GRAMMAR_GRAMMAR.match(source, builder)
- node = builder.stack[-1]
- vis = EBNFVisitor()
- node.visit(vis)
- return vis
-
-def parse_grammar_text(txt):
- """parses a grammar input
-
- stream : file-like object representing the grammar to parse
- """
- source = GrammarSource(txt)
- builder = BaseGrammarBuilder()
- result = GRAMMAR_GRAMMAR.match(source, builder)
- node = builder.stack[-1]
- vis = EBNFVisitor()
- node.visit(vis)
- return vis
-
-def target_parse_grammar_text(txt):
- vis = parse_grammar_text(txt)
- # do nothing
-
-from pprint import pprint
-if __name__ == "__main__":
- grambuild = parse_grammar(file('data/Grammar2.4'))
- for i,r in enumerate(grambuild.items):
- print "% 3d : %s" % (i, r)
- pprint(grambuild.terminals.keys())
- pprint(grambuild.tokens)
- print "|".join(grambuild.tokens.keys() )
+ def handle_TOK_STRING( self, name, value ):
+ if value in self.parser.tok_values:
+ # punctuation
+ tokencode = self.parser.tok_values[value]
+ tok = self.parser.build_token( tokencode, None )
+ else:
+ if not is_py_name(value):
+ raise RuntimeError("Unknown STRING value ('%s')" % value)
+ # assume a keyword
+ tok = self.parser.build_token( self.parser.tokens['NAME'], value)
+ if value not in self.keywords:
+ self.keywords.append(value)
+ self.rule_stack.append(tok)
Modified: pypy/dist/pypy/interpreter/pyparser/grammar.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/grammar.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/grammar.py Wed Feb 28 18:30:48 2007
@@ -13,7 +13,8 @@
except ImportError:
# allows standalone testing
Wrappable = object
- NULLTOKEN = None
+ NULLTOKEN = -1 # None
+
from syntaxtree import SyntaxNode, TempSyntaxNode, TokenNode
@@ -28,6 +29,7 @@
else:
return "["+str(codename)+"]"
+
#### Abstract interface for a lexer/tokenizer
class TokenSource(object):
"""Abstract base class for a source tokenizer"""
@@ -50,7 +52,7 @@
of the context"""
return -1
- def current_line(self):
+ def current_linesource(self):
"""Returns the current line"""
return ""
@@ -71,7 +73,8 @@
def build_first_sets(rules):
- """builds the real first tokens set for each rule in <rules>
+ """XXX : dead
+ builds the real first tokens set for each rule in <rules>
Because a rule can be recursive (directly or indirectly), the
*simplest* algorithm to build each first set is to recompute them
@@ -100,17 +103,17 @@
restore states"""
pass
-class AbstractBuilder(object):
+from pypy.interpreter.baseobjspace import Wrappable
+
+
+class AbstractBuilder(Wrappable):
"""Abstract base class for builder objects"""
- def __init__(self, rules=None, debug=0, symbols={} ):
- # a dictionary of grammar rules for debug/reference
- if rules is not None:
- self.rules = rules
- else:
- self.rules = {}
+ def __init__(self, parser, debug=0 ):
# This attribute is here for convenience
self.debug = debug
- self.symbols = symbols # mapping from codename to symbols
+ # the parser that represent the grammar used
+ assert isinstance( parser, Parser )
+ self.parser = parser
def context(self):
"""Return an opaque context object"""
@@ -142,23 +145,22 @@
class BaseGrammarBuilder(AbstractBuilder):
"""Base/default class for a builder"""
+ # XXX (adim): this is trunk's keyword management
keywords = None
- def __init__(self, rules=None, debug=0, symbols={} ):
- AbstractBuilder.__init__(self, rules, debug, symbols )
+ def __init__(self, parser, debug=0 ):
+ AbstractBuilder.__init__(self, parser, debug )
# stacks contain different objects depending on the builder class
# to be RPython they should not be defined in the base class
self.stack = []
def context(self):
"""Returns the state of the builder to be restored later"""
- #print "Save Stack:", self.stack
return BaseGrammarBuilderContext(len(self.stack))
def restore(self, ctx):
assert isinstance(ctx, BaseGrammarBuilderContext)
del self.stack[ctx.stackpos:]
- #print "Restore Stack:", self.stack
-
+
def alternative(self, rule, source):
# Do nothing, keep rule on top of the stack
if rule.is_root():
@@ -208,10 +210,12 @@
"""Base parser class"""
symbols = {} # dirty trick to provide a symbols mapping while printing (and not putting it in every object)
-
- def __init__(self, codename):
+
+ def __init__(self, parser, codename):
# the rule name
#assert type(codename)==int
+ assert isinstance(parser, Parser)
+ self.parser = parser
self.codename = codename # integer mapping to either a token value or rule symbol value
self.args = []
self.first_set = []
@@ -226,7 +230,6 @@
if self.codename >=0:
return True
return False
-
def match(self, source, builder, level=0):
"""Try to match a grammar rule
@@ -249,17 +252,17 @@
pos1 = source.get_pos()
in_first_set = self.match_first_set(builder, token)
if not in_first_set: # and not EmptyToken in self.first_set:
- if EmptyToken in self.first_set:
+ if self.parser.EmptyToken in self.first_set:
ret = builder.sequence(self, source, 0 )
if self._trace:
- self._debug_display(token, level, 'eee', builder.symbols)
+ self._debug_display(token, level, 'eee' )
return ret
if self._trace:
- self._debug_display(token, level, 'rrr', builder.symbols)
+ self._debug_display(token, level, 'rrr' )
return 0
elif self._trace:
- self._debug_display(token, level, '>>>', builder.symbols)
-
+ self._debug_display(token, level, '>>>')
+
res = self._match(source, builder, level)
if self._trace:
pos2 = source.get_pos()
@@ -267,21 +270,20 @@
prefix = '+++'
else:
prefix = '---'
- self._debug_display(token, level, prefix, builder.symbols)
+ self._debug_display(token, level, prefix)
print ' '*level, prefix, " TEXT ='%s'" % (
source.get_source_text(pos1,pos2))
if res:
print "*" * 50
return res
- def _debug_display(self, token, level, prefix, symbols):
+ def _debug_display(self, token, level, prefix):
"""prints context debug informations"""
prefix = '%s%s' % (' ' * level, prefix)
print prefix, " RULE =", self
print prefix, " TOKEN =", token
print prefix, " FIRST SET =", self.first_set
-
-
+
def _match(self, source, builder, level=0):
"""Try to match a grammar rule
@@ -295,7 +297,7 @@
returns None if no match or an object build by builder
"""
return 0
-
+
def parse(self, source):
"""Returns a simplified grammar if the rule matched at the source
current context or None"""
@@ -304,27 +306,35 @@
pass
def __str__(self):
- return self.display(0, GrammarElement.symbols )
+ try:
+ return self.display(0)
+ except Exception, e:
+ import traceback
+ traceback.print_exc()
def __repr__(self):
- return self.display(0, GrammarElement.symbols )
+ try:
+ return self.display(0)
+ except Exception, e:
+ import traceback
+ traceback.print_exc()
- def display(self, level=0, symbols={}):
+ def display(self, level=0):
"""Helper function used to represent the grammar.
mostly used for debugging the grammar itself"""
return "GrammarElement"
- def debug_return(self, ret, symbols, arg="" ):
+ def debug_return(self, ret, arg="" ):
# FIXME: use a wrapper of match() methods instead of debug_return()
# to prevent additional indirection even better a derived
# Debugging builder class
if ret and DEBUG > 0:
print "matched %s (%s): %s" % (self.__class__.__name__,
- arg, self.display(0, symbols=symbols) )
+ arg, self.display(0) )
return ret
-
+
def calc_first_set(self):
"""returns the list of possible next tokens
*must* be implemented in subclasses
@@ -337,7 +347,7 @@
token('NAME','x') matches token('NAME',None)
"""
for tk in self.first_set:
- if tk.match_token( builder, other ):
+ if tk.match_token(builder, other):
return True
return False
@@ -355,12 +365,28 @@
pass
+class GrammarProxy(GrammarElement):
+ def __init__(self, parser, rule_name, codename=-1 ):
+ GrammarElement.__init__(self, parser, codename )
+ self.rule_name = rule_name
+ self.object = None
+
+ def display(self, level=0):
+ """Helper function used to represent the grammar.
+ mostly used for debugging the grammar itself"""
+ name = self.parser.symbol_repr(self.codename)
+ repr = "Proxy("+name
+ if self.object:
+ repr+=","+self.object.display(1)
+ repr += ")"
+ return repr
+
class Alternative(GrammarElement):
"""Represents an alternative in a grammar rule (as in S -> A | B | C)"""
- def __init__(self, name, args):
- GrammarElement.__init__(self, name )
+ def __init__(self, parser, name, args):
+ GrammarElement.__init__(self, parser, name )
self.args = args
self._reordered = False
for i in self.args:
@@ -371,14 +397,14 @@
returns the object built from the first rules that matches
"""
if DEBUG > 1:
- print "try alt:", self.display(level, builder.symbols )
+ print "try alt:", self.display(level)
tok = source.peek()
# Here we stop at the first match we should
# try instead to get the longest alternative
# to see if this solve our problems with infinite recursion
for rule in self.args:
if USE_LOOKAHEAD:
- if not rule.match_first_set(builder, tok) and EmptyToken not in rule.first_set:
+ if not rule.match_first_set(builder, tok) and self.parser.EmptyToken not in rule.first_set:
if self._trace:
print "Skipping impossible rule: %s" % (rule,)
continue
@@ -388,15 +414,15 @@
return ret
return 0
- def display(self, level=0, symbols={}):
- name = get_symbol( self.codename, symbols )
+ def display(self, level=0):
+ name = self.parser.symbol_repr( self.codename )
if level == 0:
name = name + " -> "
elif self.is_root():
return name
else:
name = ""
- items = [ a.display(1,symbols) for a in self.args ]
+ items = [ a.display(1) for a in self.args ]
return name+"(" + "|".join( items ) + ")"
def calc_first_set(self):
@@ -420,7 +446,7 @@
# <tokens> is only needed for warning / debugging purposes
tokens_set = []
for rule in self.args:
- if EmptyToken in rule.first_set:
+ if self.parser.EmptyToken in rule.first_set:
empty_set.append(rule)
else:
not_empty_set.append(rule)
@@ -429,7 +455,7 @@
# It will check if a token is part of several first sets of
# a same alternative
for token in rule.first_set:
- if token is not EmptyToken and token in tokens_set:
+ if token is not self.parser.EmptyToken and token in tokens_set:
print "Warning, token %s in\n\t%s's first set is " \
" part of a previous rule's first set in " \
" alternative\n\t%s" % (token, rule, self)
@@ -438,7 +464,11 @@
print "Warning: alternative %s has more than one rule " \
"matching Empty" % self
self._reordered = True
- self.args[:] = not_empty_set
+ # self.args[:] = not_empty_set
+ for elt in self.args[:]:
+ self.args.remove(elt)
+ for elt in not_empty_set:
+ self.args.append(elt)
self.args.extend( empty_set )
def validate( self, syntax_node ):
@@ -457,16 +487,17 @@
class Sequence(GrammarElement):
"""Reprensents a Sequence in a grammar rule (as in S -> A B C)"""
- def __init__(self, name, args):
- GrammarElement.__init__(self, name )
+ def __init__(self, parser, name, args):
+ GrammarElement.__init__(self, parser, name )
self.args = args
for i in self.args:
assert isinstance( i, GrammarElement )
+
def _match(self, source, builder, level=0):
"""matches all of the symbols in order"""
if DEBUG > 1:
- print "try seq:", self.display(0, builder.symbols )
+ print "try seq:", self.display(0)
ctx = source.context()
bctx = builder.context()
for rule in self.args:
@@ -480,15 +511,15 @@
ret = builder.sequence(self, source, len(self.args))
return ret
- def display(self, level=0, symbols={}):
- name = get_symbol( self.codename, symbols )
+ def display(self, level=0):
+ name = self.parser.symbol_repr( self.codename )
if level == 0:
name = name + " -> "
elif self.is_root():
return name
else:
name = ""
- items = [a.display(1,symbols) for a in self.args]
+ items = [a.display(1) for a in self.args]
return name + "(" + " ".join( items ) + ")"
def calc_first_set(self):
@@ -503,18 +534,18 @@
for rule in self.args:
if not rule.first_set:
break
- if EmptyToken in self.first_set:
- self.first_set.remove( EmptyToken )
+ if self.parser.EmptyToken in self.first_set:
+ self.first_set.remove( self.parser.EmptyToken )
- # del self.first_set[EmptyToken]
+ # del self.first_set[self.parser.EmptyToken]
# while we're in this loop, keep agregating possible tokens
for t in rule.first_set:
if t not in self.first_set:
self.first_set.append(t)
# self.first_set[t] = 1
- if EmptyToken not in rule.first_set:
+ if self.parser.EmptyToken not in rule.first_set:
break
-
+
def validate( self, syntax_node ):
"""validate a syntax tree/subtree from this grammar node"""
if self.codename != syntax_node.name:
@@ -530,13 +561,10 @@
-
-
-
class KleeneStar(GrammarElement):
"""Represents a KleeneStar in a grammar rule as in (S -> A+) or (S -> A*)"""
- def __init__(self, name, _min = 0, _max = -1, rule=None):
- GrammarElement.__init__( self, name )
+ def __init__(self, parser, name, _min = 0, _max = -1, rule=None):
+ GrammarElement.__init__( self, parser, name )
self.args = [rule]
self.min = _min
if _max == 0:
@@ -544,8 +572,8 @@
self.max = _max
self.star = "x"
if self.min == 0:
- self.first_set.append( EmptyToken )
- # self.first_set[EmptyToken] = 1
+ self.first_set.append( self.parser.EmptyToken )
+ # self.first_set[self.parser.EmptyToken] = 1
def _match(self, source, builder, level=0):
"""matches a number of times self.args[0]. the number must be
@@ -553,8 +581,8 @@
represent infinity
"""
if DEBUG > 1:
- print "try kle:", self.display(0,builder.symbols)
- ctx = 0
+ print "try kle:", self.display(0)
+ ctx = None
bctx = None
if self.min:
ctx = source.context()
@@ -576,14 +604,19 @@
ret = builder.sequence(self, source, rules)
return ret
- def display(self, level=0, symbols={}):
- name = get_symbol( self.codename, symbols )
+ def display(self, level=0):
+ name = self.parser.symbol_repr( self.codename )
if level==0:
name = name + " -> "
elif self.is_root():
return name
else:
name = ""
+ star = self.get_star()
+ s = self.args[0].display(1)
+ return name + "%s%s" % (s, star)
+
+ def get_star(self):
star = "{%d,%d}" % (self.min,self.max)
if self.min==0 and self.max==1:
star = "?"
@@ -591,23 +624,21 @@
star = "*"
elif self.min==1 and self.max==-1:
star = "+"
- s = self.args[0].display(1, symbols)
- return name + "%s%s" % (s, star)
-
+ return star
def calc_first_set(self):
"""returns the list of possible next tokens
if S -> A*:
- LAH(S) = Union( LAH(A), EmptyToken )
+ LAH(S) = Union( LAH(A), self.parser.EmptyToken )
if S -> A+:
LAH(S) = LAH(A)
"""
rule = self.args[0]
self.first_set = rule.first_set[:]
# self.first_set = dict(rule.first_set)
- if self.min == 0 and EmptyToken not in self.first_set:
- self.first_set.append(EmptyToken)
- # self.first_set[EmptyToken] = 1
+ if self.min == 0 and self.parser.EmptyToken not in self.first_set:
+ self.first_set.append(self.parser.EmptyToken)
+ # self.first_set[self.parser.EmptyToken] = 1
def validate( self, syntax_node ):
"""validate a syntax tree/subtree from this grammar node"""
@@ -626,8 +657,8 @@
class Token(GrammarElement):
"""Represents a Token in a grammar rule (a lexer token)"""
- def __init__( self, codename, value = None):
- GrammarElement.__init__( self, codename )
+ def __init__(self, parser, codename, value=None):
+ GrammarElement.__init__(self, parser, codename)
self.value = value
self.first_set = [self]
# self.first_set = {self: 1}
@@ -643,9 +674,10 @@
else:
# error unknown or negative integer
"""
- if (self.value is not None and builder.keywords is not None
- and self.value not in builder.keywords):
- return 0
+ # XXX (adim): this is trunk's keyword management
+ # if (self.value is not None and builder.keywords is not None
+ # and self.value not in builder.keywords):
+ # return 0
ctx = source.context()
tk = source.next()
@@ -661,13 +693,12 @@
source.restore( ctx )
return 0
- def display(self, level=0, symbols={}):
- name = get_symbol( self.codename, symbols )
+ def display(self, level=0):
+ name = self.parser.symbol_repr( self.codename )
if self.value is None:
return "<%s>" % name
else:
return "<%s>=='%s'" % (name, self.value)
-
def match_token(self, builder, other):
"""convenience '==' implementation, this is *not* a *real* equality test
@@ -678,16 +709,17 @@
the comparison algorithm is similar to the one in match()
"""
if not isinstance(other, Token):
- raise RuntimeError("Unexpected token type %r" % other)
- if other is EmptyToken:
- return False
- if (self.value is not None and builder.keywords is not None
- and self.value not in builder.keywords):
+ raise RuntimeError("Unexpected token type")
+ if other is self.parser.EmptyToken:
return False
- res = other.codename == self.codename and self.value in [None, other.value]
+ # XXX (adim): this is trunk's keyword management
+ # if (self.value is not None and builder.keywords is not None
+ # and self.value not in builder.keywords):
+ # return False
+ res = other.codename == self.codename and self.value in [None, other.value]
#print "matching", self, other, res
return res
-
+
def __eq__(self, other):
return self.codename == other.codename and self.value == other.value
@@ -707,8 +739,154 @@
return False
-EmptyToken = Token(NULLTOKEN, None)
-
-
+class Parser(object):
+ def __init__(self):
+ pass
+ _anoncount = self._anoncount = -10
+ _count = self._count = 0
+ self.sym_name = {} # mapping symbol code -> symbol name
+ self.symbols = {} # mapping symbol name -> symbol code
+ self.tokens = { 'NULLTOKEN' : -1 }
+ self.EmptyToken = Token( self, -1, None )
+ self.tok_name = {}
+ self.tok_values = {}
+ self.tok_rvalues = {}
+ self._ann_sym_count = -10
+ self._sym_count = 0
+ self.all_rules = []
+ self.root_rules = {}
+
+ def symbol_repr( self, codename ):
+ if codename in self.tok_name:
+ return self.tok_name[codename]
+ elif codename in self.sym_name:
+ return self.sym_name[codename]
+ return "%d" % codename
+
+ def add_symbol( self, sym ):
+ # assert isinstance( sym, str )
+ if not sym in self.symbols:
+ val = self._sym_count
+ self._sym_count += 1
+ self.symbols[sym] = val
+ self.sym_name[val] = sym
+ return val
+ return self.symbols[ sym ]
+
+ def add_anon_symbol( self, sym ):
+ # assert isinstance( sym, str )
+ if not sym in self.symbols:
+ val = self._ann_sym_count
+ self._ann_sym_count -= 1
+ self.symbols[sym] = val
+ self.sym_name[val] = sym
+ return val
+ return self.symbols[ sym ]
+
+ def add_token( self, tok, value = None ):
+ # assert isinstance( tok, str )
+ if not tok in self.tokens:
+ val = self._sym_count
+ self._sym_count += 1
+ self.tokens[tok] = val
+ self.tok_name[val] = tok
+ if value is not None:
+ self.tok_values[value] = val
+ # XXX : this reverse mapping seemed only to be used
+ # because of pycodegen visitAugAssign
+ self.tok_rvalues[val] = value
+ return val
+ return self.tokens[ tok ]
+
+ def load_symbols( self, symbols ):
+ for _value, _name in symbols.items():
+ if _value < self._ann_sym_count:
+ self._ann_sym_count = _value - 1
+ if _value > self._sym_count:
+ self._sym_count = _value + 1
+ self.symbols[_name] = _value
+ self.sym_name[_value] = _name
+
+ def build_first_sets(self):
+ """builds the real first tokens set for each rule in <rules>
+
+ Because a rule can be recursive (directly or indirectly), the
+ *simplest* algorithm to build each first set is to recompute them
+ until Computation(N) = Computation(N-1), N being the number of rounds.
+ As an example, on Python2.3's grammar, we need 19 cycles to compute
+ full first sets.
+ """
+ rules = self.all_rules
+ changed = True
+ while changed:
+ # loop while one first set is changed
+ changed = False
+ for rule in rules:
+ # For each rule, recompute first set
+ size = len(rule.first_set)
+ rule.calc_first_set()
+ new_size = len(rule.first_set)
+ if new_size != size:
+ changed = True
+ for r in rules:
+ assert len(r.first_set) > 0, "Error: ot Empty firstset for %s" % r
+ r.reorder_rule()
+
+
+ def build_alternative( self, name_id, args ):
+ # assert isinstance( name_id, int )
+ assert isinstance(args, list)
+ alt = Alternative( self, name_id, args )
+ self.all_rules.append( alt )
+ return alt
+
+ def Alternative_n(self, name, args ):
+ # assert isinstance(name, str)
+ name_id = self.add_symbol( name )
+ return self.build_alternative( name_id, args )
+
+ def build_sequence( self, name_id, args ):
+ # assert isinstance( name_id, int )
+ alt = Sequence( self, name_id, args )
+ self.all_rules.append( alt )
+ return alt
+
+ def Sequence_n(self, name, args ):
+ # assert isinstance(name, str)
+ name_id = self.add_symbol( name )
+ return self.build_sequence( name_id, args )
+
+ def build_kleenestar( self, name_id, _min = 0, _max = -1, rule = None ):
+ # assert isinstance( name_id, int )
+ alt = KleeneStar( self, name_id, _min, _max, rule )
+ self.all_rules.append( alt )
+ return alt
+ def KleeneStar_n(self, name, _min = 0, _max = -1, rule = None ):
+ # assert isinstance(name, str)
+ name_id = self.add_symbol( name )
+ return self.build_kleenestar( name_id, _min, _max, rule )
+
+ def Token_n(self, name, value = None ):
+ # assert isinstance( name, str)
+ # assert value is None or isinstance( value, str)
+ name_id = self.add_token( name, value )
+ return self.build_token( name_id, value )
+
+ def build_token(self, name_id, value = None ):
+ # assert isinstance( name_id, int )
+ # assert value is None or isinstance( value, str)
+ tok = Token( self, name_id, value )
+ return tok
+
+
+ # Debugging functions
+ def show_rules(self, name):
+ import re
+ rex = re.compile(name)
+ rules =[]
+ for _name, _val in self.symbols.items():
+ if rex.search(_name) and _val>=0:
+ rules.append(self.root_rules[_val])
+ return rules
Modified: pypy/dist/pypy/interpreter/pyparser/pysymbol.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/pysymbol.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/pysymbol.py Wed Feb 28 18:30:48 2007
@@ -5,6 +5,7 @@
# important here
class SymbolMapper(object):
+ """XXX dead"""
def __init__(self, sym_name=None ):
_anoncount = self._anoncount = -10
_count = self._count = 0
@@ -22,7 +23,7 @@
self._count = _count
def add_symbol( self, sym ):
- assert type(sym)==str
+ # assert isinstance(sym, str)
if not sym in self.sym_values:
self._count += 1
val = self._count
@@ -32,7 +33,7 @@
return self.sym_values[ sym ]
def add_anon_symbol( self, sym ):
- assert type(sym)==str
+ # assert isinstance(sym, str)
if not sym in self.sym_values:
self._anoncount -= 1
val = self._anoncount
@@ -43,7 +44,7 @@
def __getitem__(self, sym ):
"""NOT RPYTHON"""
- assert type(sym)==str
+ # assert isinstance(sym, str)
return self.sym_values[ sym ]
def __contains__(self, sym):
@@ -57,6 +58,12 @@
# once loaded the grammar parser will fill the mappings with the
# grammar symbols
+# XXX: is this completly dead ?
+## # prepopulate symbol table from symbols used by CPython
+## for _value, _name in _cpython_symbols.sym_name.items():
+## globals()[_name] = _value
+
+
def gen_symbol_file(fname):
"""
Generate a compatible symbol file for symbol.py, using the grammar that has
Modified: pypy/dist/pypy/interpreter/pyparser/pythonlexer.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/pythonlexer.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/pythonlexer.py Wed Feb 28 18:30:48 2007
@@ -5,10 +5,10 @@
import sys
from codeop import PyCF_DONT_IMPLY_DEDENT
-from pypy.interpreter.pyparser.grammar import TokenSource, Token
+from pypy.interpreter.pyparser.grammar import TokenSource, Token, AbstractContext, Parser
from pypy.interpreter.pyparser.error import SyntaxError
+
import pytoken
-from pytoken import NEWLINE
# Don't import string for that ...
NAMECHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
@@ -51,7 +51,7 @@
################################################################################
from pypy.interpreter.pyparser import pytoken
from pytokenize import tabsize, whiteSpaceDFA, triple_quoted, endDFAs, \
- single_quoted, pseudoDFA
+ single_quoted, pseudoDFA
import automata
@@ -62,7 +62,7 @@
SyntaxError.__init__(self, msg, lineno, offset, line)
self.token_stack = token_stack
-def generate_tokens(lines, flags):
+def generate_tokens( parser, lines, flags):
"""
This is a rewrite of pypy.module.parser.pytokenize.generate_tokens since
the original function is not RPYTHON (uses yield)
@@ -91,6 +91,7 @@
#for line in lines:
# print repr(line)
#print '------------------- flags=%s ---->' % flags
+ assert isinstance( parser, Parser )
token_list = []
lnum = parenlev = continued = 0
namechars = NAMECHARS
@@ -120,7 +121,7 @@
endmatch = endDFA.recognize(line)
if endmatch >= 0:
pos = end = endmatch
- tok = Token(pytoken.STRING, contstr + line[:end])
+ tok = parser.build_token(parser.tokens['STRING'], contstr + line[:end])
token_list.append((tok, line, lnum, pos))
last_comment = ''
# token_list.append((STRING, contstr + line[:end],
@@ -129,7 +130,7 @@
contline = None
elif (needcont and not line.endswith('\\\n') and
not line.endswith('\\\r\n')):
- tok = Token(pytoken.ERRORTOKEN, contstr + line)
+ tok = parser.build_token(parser.tokens['ERRORTOKEN'], contstr + line)
token_list.append((tok, line, lnum, pos))
last_comment = ''
# token_list.append((ERRORTOKEN, contstr + line,
@@ -155,10 +156,10 @@
if line[pos] in '#\r\n': # skip comments or blank lines
if line[pos] == '#':
- tok = Token(pytoken.COMMENT, line[pos:])
+ tok = parser.build_token(parser.tokens['COMMENT'], line[pos:])
last_comment = line[pos:]
else:
- tok = Token(pytoken.NL, line[pos:])
+ tok = parser.build_token(parser.tokens['NL'], line[pos:])
last_comment = ''
# XXX Skip NL and COMMENT Tokens
# token_list.append((tok, line, lnum, pos))
@@ -166,12 +167,12 @@
if column > indents[-1]: # count indents or dedents
indents.append(column)
- tok = Token(pytoken.INDENT, line[:pos])
+ tok = parser.build_token(parser.tokens['INDENT'], line[:pos])
token_list.append((tok, line, lnum, pos))
last_comment = ''
while column < indents[-1]:
indents = indents[:-1]
- tok = Token(pytoken.DEDENT, '')
+ tok = parser.build_token(parser.tokens['DEDENT'], '')
token_list.append((tok, line, lnum, pos))
last_comment = ''
else: # continued statement
@@ -198,22 +199,22 @@
token, initial = line[start:end], line[start]
if initial in numchars or \
(initial == '.' and token != '.'): # ordinary number
- tok = Token(pytoken.NUMBER, token)
+ tok = parser.build_token(parser.tokens['NUMBER'], token)
token_list.append((tok, line, lnum, pos))
last_comment = ''
elif initial in '\r\n':
if parenlev > 0:
- tok = Token(pytoken.NL, token)
+ tok = parser.build_token(parser.tokens['NL'], token)
last_comment = ''
# XXX Skip NL
else:
- tok = Token(pytoken.NEWLINE, token)
+ tok = parser.build_token(parser.tokens['NEWLINE'], token)
# XXX YUCK !
tok.value = last_comment
token_list.append((tok, line, lnum, pos))
last_comment = ''
elif initial == '#':
- tok = Token(pytoken.COMMENT, token)
+ tok = parser.build_token(parser.tokens['COMMENT'], token)
last_comment = token
# XXX Skip # token_list.append((tok, line, lnum, pos))
# token_list.append((COMMENT, token, spos, epos, line))
@@ -223,7 +224,7 @@
if endmatch >= 0: # all on one line
pos = endmatch
token = line[start:pos]
- tok = Token(pytoken.STRING, token)
+ tok = parser.build_token(parser.tokens['STRING'], token)
token_list.append((tok, line, lnum, pos))
last_comment = ''
else:
@@ -240,11 +241,11 @@
contline = line
break
else: # ordinary string
- tok = Token(pytoken.STRING, token)
+ tok = parser.build_token(parser.tokens['STRING'], token)
token_list.append((tok, line, lnum, pos))
last_comment = ''
elif initial in namechars: # ordinary name
- tok = Token(pytoken.NAME, token)
+ tok = parser.build_token(parser.tokens['NAME'], token)
token_list.append((tok, line, lnum, pos))
last_comment = ''
elif initial == '\\': # continued stmt
@@ -258,10 +259,11 @@
if parenlev < 0:
raise TokenError("unmatched '%s'" % initial, line,
(lnum-1, 0), token_list)
- if token in pytoken.tok_punct:
- tok = Token(pytoken.tok_punct[token])
+ if token in parser.tok_values:
+ punct = parser.tok_values[token]
+ tok = parser.build_token(punct)
else:
- tok = Token(pytoken.OP, token)
+ tok = parser.build_token(parser.tokens['OP'], token)
token_list.append((tok, line, lnum, pos))
last_comment = ''
else:
@@ -271,33 +273,39 @@
if start<max and line[start] in single_quoted:
raise TokenError("EOL while scanning single-quoted string", line,
(lnum, start), token_list)
- tok = Token(pytoken.ERRORTOKEN, line[pos])
+ tok = parser.build_token(parser.tokens['ERRORTOKEN'], line[pos])
token_list.append((tok, line, lnum, pos))
last_comment = ''
pos = pos + 1
lnum -= 1
if not (flags & PyCF_DONT_IMPLY_DEDENT):
- if token_list and token_list[-1][0].codename != pytoken.NEWLINE:
- token_list.append((Token(pytoken.NEWLINE, ''), '\n', lnum, 0))
+ if token_list and token_list[-1][0].codename != parser.tokens['NEWLINE']:
+ token_list.append((parser.build_token(parser.tokens['NEWLINE'], ''), '\n', lnum, 0))
for indent in indents[1:]: # pop remaining indent levels
- tok = Token(pytoken.DEDENT, '')
+ tok = parser.build_token(parser.tokens['DEDENT'], '')
token_list.append((tok, line, lnum, pos))
#if token_list and token_list[-1][0].codename != pytoken.NEWLINE:
- token_list.append((Token(pytoken.NEWLINE, ''), '\n', lnum, 0))
+ token_list.append((parser.build_token(parser.tokens['NEWLINE'], ''), '\n', lnum, 0))
- tok = Token(pytoken.ENDMARKER, '',)
+ tok = parser.build_token(parser.tokens['ENDMARKER'], '',)
token_list.append((tok, line, lnum, pos))
#for t in token_list:
# print '%20s %-25s %d' % (pytoken.tok_name.get(t[0].codename, '?'), t[0], t[-2])
#print '----------------------------------------- pyparser/pythonlexer.py'
return token_list
+
+class PythonSourceContext(AbstractContext):
+ def __init__(self, pos ):
+ self.pos = pos
+
class PythonSource(TokenSource):
"""This source uses Jonathan's tokenizer"""
- def __init__(self, strings, flags=0):
+ def __init__(self, parser, strings, flags=0):
# TokenSource.__init__(self)
- tokens = generate_tokens(strings, flags)
+ #self.parser = parser
+ tokens = generate_tokens( parser, strings, flags)
self.token_stack = tokens
self._current_line = '' # the current line (as a string)
self._lineno = -1
@@ -317,7 +325,7 @@
self._offset = pos
return tok
- def current_line(self):
+ def current_linesource(self):
"""Returns the current line being parsed"""
return self._current_line
@@ -327,11 +335,12 @@
def context(self):
"""Returns an opaque context object for later restore"""
- return self.stack_pos
+ return PythonSourceContext(self.stack_pos)
def restore(self, ctx):
"""Restores a context"""
- self.stack_pos = ctx
+ assert isinstance(ctx, PythonSourceContext)
+ self.stack_pos = ctx.pos
def peek(self):
"""returns next token without consuming it"""
@@ -363,8 +372,8 @@
return (self._current_line, self._lineno)
# return 'line %s : %s' % ('XXX', self._current_line)
-NONE_LIST = [pytoken.ENDMARKER, pytoken.INDENT, pytoken.DEDENT]
-NAMED_LIST = [pytoken.OP]
+#NONE_LIST = [pytoken.ENDMARKER, pytoken.INDENT, pytoken.DEDENT]
+#NAMED_LIST = [pytoken.OP]
Source = PythonSource
Modified: pypy/dist/pypy/interpreter/pyparser/pythonparse.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/pythonparse.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/pythonparse.py Wed Feb 28 18:30:48 2007
@@ -5,70 +5,31 @@
helper functions are provided that use the grammar to parse
using file_input, single_input and eval_input targets
"""
+import sys
+import os
from pypy.interpreter.error import OperationError, debug_print
from pypy.interpreter import gateway
from pypy.interpreter.pyparser.error import SyntaxError
-from pythonlexer import Source, match_encoding_declaration
+from pypy.interpreter.pyparser.pythonlexer import Source, match_encoding_declaration
from pypy.interpreter.astcompiler.consts import CO_FUTURE_WITH_STATEMENT
-import pysymbol
-import ebnfparse
-import sys
-import os
-import grammar
+import pypy.interpreter.pyparser.pysymbol as pysymbol
+import pypy.interpreter.pyparser.pytoken as pytoken
+import pypy.interpreter.pyparser.ebnfparse as ebnfparse
+from pypy.interpreter.pyparser.ebnflexer import GrammarSource
+from pypy.interpreter.pyparser.ebnfgrammar import GRAMMAR_GRAMMAR
+import pypy.interpreter.pyparser.grammar as grammar
+from pypy.interpreter.pyparser.pythonutil import build_parser_for_version, build_parser
+
+# try:
+from pypy.interpreter.pyparser import symbol
+# except ImportError:
+# # for standalone testing
+# import symbol
from codeop import PyCF_DONT_IMPLY_DEDENT
-class AlternateGrammarException(Exception):
- pass
-
-class PythonParser(object):
- """Wrapper class for python grammar"""
- def __init__(self, grammar_builder):
- self.items = grammar_builder.items
- self.rules = grammar_builder.rules
- # Build first sets for each rule (including anonymous ones)
- grammar.build_first_sets(self.items)
- self.symbols = grammar_builder.symbols
- self.with_grammar = None
- self.keywords = dict.fromkeys(grammar_builder.keywords)
- # Only when with_statement is enabled
- self.keywords.pop('with', None)
- self.keywords.pop('as', None)
-
- def parse_source(self, textsrc, goal, builder, flags=0):
- """Parse a python source according to goal"""
- # Detect source encoding.
- if textsrc[:3] == '\xEF\xBB\xBF':
- textsrc = textsrc[3:]
- enc = 'utf-8'
- else:
- enc = _normalize_encoding(_check_for_encoding(textsrc))
- if enc is not None and enc not in ('utf-8', 'iso-8859-1'):
- textsrc = recode_to_utf8(builder.space, textsrc, enc)
-
- lines = [line + '\n' for line in textsrc.split('\n')]
- builder.source_encoding = enc
- if len(textsrc) and textsrc[-1] == '\n':
- lines.pop()
- flags &= ~PyCF_DONT_IMPLY_DEDENT
- return self.parse_lines(lines, goal, builder, flags)
-
-
- def parse_lines(self, lines, goal, builder, flags=0):
- builder.keywords = self.keywords.copy()
- if flags & CO_FUTURE_WITH_STATEMENT:
- builder.enable_with()
- goalnumber = self.symbols.sym_values[goal]
- target = self.rules[goalnumber]
- src = Source(lines, flags)
-
- if not target.match(src, builder):
- line, lineno = src.debug()
- # XXX needs better error messages
- raise SyntaxError("invalid syntax", lineno, -1, line)
- # return None
- return builder
+## files encoding management ############################################
_recode_to_utf8 = gateway.applevel(r'''
def _recode_to_utf8(text, encoding):
return unicode(text, encoding).encode("utf-8")
@@ -109,6 +70,7 @@
return _check_line_for_encoding(s[eol + 1:])
return _check_line_for_encoding(s[eol + 1:eol2])
+
def _check_line_for_encoding(line):
"""returns the declared encoding or None"""
i = 0
@@ -119,61 +81,122 @@
return None
return match_encoding_declaration(line[i:])
-PYTHON_VERSION = ".".join([str(i) for i in sys.version_info[:2]])
-def get_grammar_file( version ):
- """returns the python grammar corresponding to our CPython version"""
- if version == "native":
- _ver = PYTHON_VERSION
- elif version == "stable":
- _ver = "_stablecompiler"
- elif version in ("2.3","2.4","2.5a"):
- _ver = version
- return os.path.join( os.path.dirname(__file__), "data", "Grammar" + _ver ), _ver
-
-# unfortunately the command line options are not parsed yet, so it cannot
-# be made configurable yet
-PYTHON_GRAMMAR, PYPY_VERSION = get_grammar_file("2.4")
-
-def python_grammar(fname):
- """returns a PythonParser build from the specified grammar file"""
- level = grammar.DEBUG
- grammar.DEBUG = 0
- gram = ebnfparse.parse_grammar( file(fname) )
- grammar.DEBUG = level
- parser = PythonParser( gram )
- return parser
-
-debug_print( "Loading grammar %s" % PYTHON_GRAMMAR )
-PYTHON_PARSER = python_grammar( PYTHON_GRAMMAR)
-#PYTHON_PARSER.with_grammar = python_grammar( PYTHON_GRAMMAR + '_with' )
-
-def reload_grammar(version):
- """helper function to test with pypy different grammars"""
- global PYTHON_GRAMMAR, PYTHON_PARSER, PYPY_VERSION
- PYTHON_GRAMMAR, PYPY_VERSION = get_grammar_file( version )
- debug_print( "Reloading grammar %s" % PYTHON_GRAMMAR )
- PYTHON_PARSER = python_grammar( PYTHON_GRAMMAR )
-
-def parse_file_input(pyf, gram, builder ):
- """Parse a python file"""
- return gram.parse_source( pyf.read(), "file_input", builder )
-
-def parse_single_input(textsrc, gram, builder ):
- """Parse a python single statement"""
- return gram.parse_source( textsrc, "single_input", builder )
-
-def parse_eval_input(textsrc, gram, builder):
- """Parse a python expression"""
- return gram.parse_source( textsrc, "eval_input", builder )
+## Python Source Parser ###################################################
+class PythonParser(grammar.Parser):
+ """Wrapper class for python grammar"""
+ targets = {
+ 'eval' : "eval_input",
+ 'single' : "single_input",
+ 'exec' : "file_input",
+ }
+
+ def __init__(self): # , predefined_symbols=None):
+ grammar.Parser.__init__(self)
+ pytoken.setup_tokens(self)
+ # if predefined_symbols:
+ # self.load_symbols(predefined_symbols)
+ self.keywords = []
+
+ # XXX (adim): this is trunk's keyword management
+ # self.with_grammar = None
+ # self.keywords = dict.fromkeys(grammar_builder.keywords)
+ # # Only when with_statement is enabled
+ # self.keywords.pop('with', None)
+ # self.keywords.pop('as', None)
+
+ def parse_source(self, textsrc, mode, builder, flags=0):
+ """Parse a python source according to goal"""
+ goal = self.targets[mode]
+ # Detect source encoding.
+ if textsrc[:3] == '\xEF\xBB\xBF':
+ textsrc = textsrc[3:]
+ enc = 'utf-8'
+ else:
+ enc = _normalize_encoding(_check_for_encoding(textsrc))
+ if enc is not None and enc not in ('utf-8', 'iso-8859-1'):
+ textsrc = recode_to_utf8(builder.space, textsrc, enc)
+
+ lines = [line + '\n' for line in textsrc.split('\n')]
+ builder.source_encoding = enc
+ if len(textsrc) and textsrc[-1] == '\n':
+ lines.pop()
+ flags &= ~PyCF_DONT_IMPLY_DEDENT
+ return self.parse_lines(lines, goal, builder, flags)
+
+
+ def parse_lines(self, lines, goal, builder, flags=0):
+ # XXX (adim): this is trunk's keyword management
+ # builder.keywords = self.keywords.copy()
+ # if flags & CO_FUTURE_WITH_STATEMENT:
+ # builder.enable_with()
+ goalnumber = self.symbols[goal]
+ target = self.root_rules[goalnumber]
+ src = Source(self, lines, flags)
+ if not target.match(src, builder):
+ line, lineno = src.debug()
+ # XXX needs better error messages
+ raise SyntaxError("invalid syntax", lineno, -1, line)
+ # return None
+ return builder
+
+ def update_rules_references(self):
+ """update references to old rules"""
+ # brute force algorithm
+ for rule in self.all_rules:
+ for i in range(len(rule.args)):
+ arg = rule.args[i]
+ if arg.codename in self.root_rules:
+ real_rule = self.root_rules[arg.codename]
+ # This rule has been updated
+ if real_rule is not rule.args[i]:
+ rule.args[i] = real_rule
+
+
+ def insert_rule(self, ruledef):
+ """parses <ruledef> and inserts corresponding rules in the parser"""
+ # parse the ruledef(s)
+ source = GrammarSource(GRAMMAR_GRAMMAR, ruledef)
+ builder = ebnfparse.EBNFBuilder(GRAMMAR_GRAMMAR, dest_parser=self)
+ GRAMMAR_GRAMMAR.root_rules['grammar'].match(source, builder)
+ # remove proxy objects if any
+ builder.resolve_rules()
+ # update keywords
+ self.keywords.extend(builder.keywords)
+ # update old references in case an existing rule was modified
+ self.update_rules_references()
+ # recompute first sets
+ self.build_first_sets()
+
+def make_pyparser(version="2.4"):
+ parser = PythonParser()
+ return build_parser_for_version(version, parser=parser)
+
+PYTHON_PARSER = make_pyparser()
+
+def translation_target(grammardef):
+ parser = PythonParser() # predefined_symbols=symbol.sym_name)
+ source = GrammarSource(GRAMMAR_GRAMMAR, grammardef)
+ builder = ebnfparse.EBNFBuilder(GRAMMAR_GRAMMAR, dest_parser=parser)
+ GRAMMAR_GRAMMAR.root_rules['grammar'].match(source, builder)
+ builder.resolve_rules()
+ parser.build_first_sets()
+ parser.keywords = builder.keywords
+ return 0
+
+
+## XXX BROKEN
+## def parse_grammar(space, w_src):
+## """Loads the grammar using the 'dynamic' rpython parser"""
+## src = space.str_w( w_src )
+## ebnfbuilder = ebnfparse.parse_grammar_text( src )
+## ebnfbuilder.resolve_rules()
+## grammar.build_first_sets(ebnfbuilder.all_rules)
+## return space.wrap( ebnfbuilder.root_rules )
def grammar_rules( space ):
w_rules = space.newdict()
- for key, value in PYTHON_PARSER.rules.iteritems():
+ parser = make_pyparser()
+ for key, value in parser.rules.iteritems():
space.setitem(w_rules, space.wrap(key), space.wrap(value))
return w_rules
-
-
-def make_rule( space, w_rule ):
- rule = space.str_w( w_rule )
-
Modified: pypy/dist/pypy/interpreter/pyparser/pythonutil.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/pythonutil.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/pythonutil.py Wed Feb 28 18:30:48 2007
@@ -1,17 +1,109 @@
-__all__ = ["python_parse", "pypy_parse"]
+"""miscelanneous utility functions
+XXX: svn mv pythonutil.py gramtools.py / parsertools.py
+"""
+
+import sys
+import os
import parser
-import pythonparse
-from tuplebuilder import TupleBuilder
-from astbuilder import AstBuilder
-
-PYTHON_PARSER = pythonparse.PYTHON_PARSER
-TARGET_DICT = {
- 'exec' : "file_input",
- 'eval' : "eval_input",
- 'single' : "single_input",
- }
+from pypy.interpreter.pyparser.grammar import Parser
+from pypy.interpreter.pyparser.pytoken import setup_tokens
+from pypy.interpreter.pyparser.ebnfgrammar import GRAMMAR_GRAMMAR
+from pypy.interpreter.pyparser.ebnflexer import GrammarSource
+from pypy.interpreter.pyparser.ebnfparse import EBNFBuilder
+
+from pypy.interpreter.pyparser.tuplebuilder import TupleBuilder
+
+PYTHON_VERSION = ".".join([str(i) for i in sys.version_info[:2]])
+
+def dirname(filename):
+ """redefine dirname to avoid the need of os.path.split being rpython
+ """
+ i = filename.rfind(os.sep) + 1
+ assert i >= 0
+ return filename[:i]
+
+
+def get_grammar_file(version):
+ """returns the python grammar corresponding to our CPython version"""
+ if version == "native":
+ _ver = PYTHON_VERSION
+ elif version == "stable":
+ _ver = "_stablecompiler"
+ elif version in ("2.3","2.4","2.5a"):
+ _ver = version
+ else:
+ raise ValueError('no such grammar version: %s' % version)
+ # two osp.join to avoid TyperError: can only iterate over tuples of length 1 for now
+ # generated by call to osp.join(a, *args)
+ return os.path.join( dirname(__file__),
+ os.path.join("data", "Grammar" + _ver) ), _ver
+
+
+def build_parser(gramfile, parser=None):
+ """reads a (EBNF) grammar definition and builds a parser for it"""
+ if parser is None:
+ parser = Parser()
+ setup_tokens(parser)
+ # XXX: clean up object dependencies
+ from pypy.rlib.streamio import open_file_as_stream
+ stream = open_file_as_stream(gramfile)
+ grammardef = stream.readall()
+ stream.close()
+ assert isinstance(grammardef, str)
+ source = GrammarSource(GRAMMAR_GRAMMAR, grammardef)
+ builder = EBNFBuilder(GRAMMAR_GRAMMAR, dest_parser=parser)
+ GRAMMAR_GRAMMAR.root_rules['grammar'].match(source, builder)
+ builder.resolve_rules()
+ parser.build_first_sets()
+ parser.keywords = builder.keywords
+ return parser
+
+
+def build_parser_for_version(version, parser=None):
+ gramfile, _ = get_grammar_file(version)
+ return build_parser(gramfile, parser)
+
+
+## XXX: the below code should probably go elsewhere
+
+## convenience functions for computing AST objects using recparser
+def ast_from_input(input, mode, transformer, parser):
+ """converts a source input into an AST
+
+ - input : the source to be converted
+ - mode : 'exec', 'eval' or 'single'
+ - transformer : the transfomer instance to use to convert
+ the nested tuples into the AST
+ XXX: transformer could be instantiated here but we don't want
+ here to explicitly import compiler or stablecompiler or
+ etc. This is to be fixed in a clean way
+ """
+ builder = TupleBuilder(parser, lineno=True)
+ parser.parse_source(input, mode, builder)
+ tuples = builder.stack[-1].as_tuple(True)
+ return transformer.compile_node(tuples)
+
+
+def pypy_parse(source, mode='exec', lineno=False):
+ from pypy.interpreter.pyparser.pythonparse import PythonParser, make_pyparser
+ from pypy.interpreter.pyparser.astbuilder import AstBuilder
+ # parser = build_parser_for_version("2.4", PythonParser())
+ parser = make_pyparser('stable')
+ builder = TupleBuilder(parser)
+ parser.parse_source(source, mode, builder)
+ return builder.stack[-1].as_tuple(lineno)
+
+
+def source2ast(source, mode='exec', version='2.4', space=None):
+ from pypy.interpreter.pyparser.pythonparse import PythonParser, make_pyparser
+ from pypy.interpreter.pyparser.astbuilder import AstBuilder
+ parser = make_pyparser(version)
+ builder = AstBuilder(parser, space=space)
+ parser.parse_source(source, mode, builder)
+ return builder.rule_stack[-1]
+
## convenience functions around CPython's parser functions
def python_parsefile(filename, lineno=False):
@@ -32,7 +124,6 @@
tp = parser.suite(source)
return parser.ast2tuple(tp, line_info=lineno)
-## convenience functions around recparser functions
def pypy_parsefile(filename, lineno=False):
"""parse <filename> using PyPy's parser module and return
a tuple of three elements :
@@ -48,105 +139,3 @@
source = pyf.read()
pyf.close()
return pypy_parse(source, 'exec', lineno)
-
-def internal_pypy_parse(source, mode='exec', lineno=False, flags=0, space=None,
- parser = PYTHON_PARSER):
- """This function has no other role than testing the parser's annotation
-
- annotateme() is basically the same code that pypy_parse(), but with the
- following differences :
-
- - returns a tuplebuilder.StackElement instead of the *real* nested
- tuples (StackElement is only a wrapper class around these tuples)
-
- """
- builder = TupleBuilder(parser.rules, lineno=False)
- if space is not None:
- builder.space = space
- target_rule = TARGET_DICT[mode]
- parser.parse_source(source, target_rule, builder, flags)
- stack_element = builder.stack[-1]
- return (builder.source_encoding, stack_element)
-
-def parse_result_to_nested_tuples(parse_result, lineno=False):
- """NOT_RPYTHON"""
- source_encoding, stack_element = parse_result
- nested_tuples = stack_element.as_tuple(lineno)
- return nested_tuples
-
-def pypy_parse(source, mode='exec', lineno=False, flags=0, parser = PYTHON_PARSER):
- """
- NOT_RPYTHON !
- parse <source> using PyPy's parser module and return
- a tuple of three elements :
- - The encoding declaration symbol or None if there were no encoding
- statement
- - The TupleBuilder's stack top element (instance of
- tuplebuilder.StackElement which is a wrapper of some nested tuples
- like those returned by the CPython's parser)
- - The encoding string or None if there were no encoding statement
- nested tuples
- """
- source_encoding, stack_element = internal_pypy_parse(source, mode, lineno=lineno,
- flags=lineno, parser = parser)
- # convert the stack element into nested tuples (caution, the annotator
- # can't follow this call)
- return parse_result_to_nested_tuples((source_encoding, stack_element), lineno=lineno)
-
-## convenience functions for computing AST objects using recparser
-def ast_from_input(input, mode, transformer, parser = PYTHON_PARSER):
- """converts a source input into an AST
-
- - input : the source to be converted
- - mode : 'exec', 'eval' or 'single'
- - transformer : the transfomer instance to use to convert
- the nested tuples into the AST
- XXX: transformer could be instantiated here but we don't want
- here to explicitly import compiler or stablecompiler or
- etc. This is to be fixed in a clean way
- """
- tuples = pypy_parse(input, mode, True, parser)
- ast = transformer.compile_node(tuples)
- return ast
-
-def target_ast_compile(space, input, mode):
- from pypy.interpreter.astcompiler import ast, misc, pycodegen
- builder = AstBuilder(rules=None, debug=0, space=space)
- target = TARGET_DICT[mode]
- PYTHON_PARSER.parse_source(input, target, builder)
- ast_tree = builder.rule_stack[-1]
- misc.set_filename("<?>", ast_tree)
- if mode=="single":
- codegenerator = pycodegen.InteractiveCodeGenerator(space,ast_tree)
- elif mode=="eval":
- codegenerator = pycodegen.ExpressionCodeGenerator(space,ast_tree)
- elif mode=="exec":
- codegenerator = pycodegen.ModuleCodeGenerator(space,ast_tree)
- else:
- raise ValueError("incorrect mode")
- code1 = codegenerator.getCode()
- return code1
-
-
-def internal_pypy_parse_to_ast(source, mode='exec', lineno=False, flags=0):
- builder = AstBuilder()
- target_rule = TARGET_DICT[mode]
- PYTHON_PARSER.parse_source(source, target_rule, builder, flags)
- ast_tree = builder.rule_stack[-1]
- return (builder.source_encoding, ast_tree)
-
-
-if __name__ == "__main__":
- import sys
- if len(sys.argv) < 2:
- print "python parse.py [-d N] test_file.py"
- sys.exit(1)
- if sys.argv[1] == "-d":
- debug_level = int(sys.argv[2])
- test_file = sys.argv[3]
- else:
- test_file = sys.argv[1]
- print "-"*20
- print
- print "pyparse \n", pypy_parsefile(test_file)
- print "parser \n", python_parsefile(test_file)
Modified: pypy/dist/pypy/interpreter/pyparser/pytoken.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/pytoken.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/pytoken.py Wed Feb 28 18:30:48 2007
@@ -5,131 +5,77 @@
N_TOKENS = 0
-tok_name = {}
-tok_values = {}
+# This is used to replace None
+NULLTOKEN = -1
-def add_token(name, value=None):
- global N_TOKENS
- if value is None:
- value = N_TOKENS
- N_TOKENS += 1
- _g = globals()
- _g[name] = value
- tok_name[value] = name
- tok_values[name] = value
+tok_name = {-1 : 'NULLTOKEN'}
+tok_values = {'NULLTOKEN' : -1}
-# This is used to replace None
-add_token( 'NULLTOKEN', -1 )
+# tok_rpunct = {}
+def setup_tokens( parser ):
+ # global tok_rpunct
# For compatibility, this produces the same constant values as Python 2.4.
-add_token( 'ENDMARKER' )
-add_token( 'NAME' )
-add_token( 'NUMBER' )
-add_token( 'STRING' )
-add_token( 'NEWLINE' )
-add_token( 'INDENT' )
-add_token( 'DEDENT' )
-add_token( 'LPAR' )
-add_token( 'RPAR' )
-add_token( 'LSQB' )
-add_token( 'RSQB' )
-add_token( 'COLON' )
-add_token( 'COMMA' )
-add_token( 'SEMI' )
-add_token( 'PLUS' )
-add_token( 'MINUS' )
-add_token( 'STAR' )
-add_token( 'SLASH' )
-add_token( 'VBAR' )
-add_token( 'AMPER' )
-add_token( 'LESS' )
-add_token( 'GREATER' )
-add_token( 'EQUAL' )
-add_token( 'DOT' )
-add_token( 'PERCENT' )
-add_token( 'BACKQUOTE' )
-add_token( 'LBRACE' )
-add_token( 'RBRACE' )
-add_token( 'EQEQUAL' )
-add_token( 'NOTEQUAL' )
-add_token( 'LESSEQUAL' )
-add_token( 'GREATEREQUAL' )
-add_token( 'TILDE' )
-add_token( 'CIRCUMFLEX' )
-add_token( 'LEFTSHIFT' )
-add_token( 'RIGHTSHIFT' )
-add_token( 'DOUBLESTAR' )
-add_token( 'PLUSEQUAL' )
-add_token( 'MINEQUAL' )
-add_token( 'STAREQUAL' )
-add_token( 'SLASHEQUAL' )
-add_token( 'PERCENTEQUAL' )
-add_token( 'AMPEREQUAL' )
-add_token( 'VBAREQUAL' )
-add_token( 'CIRCUMFLEXEQUAL' )
-add_token( 'LEFTSHIFTEQUAL' )
-add_token( 'RIGHTSHIFTEQUAL' )
-add_token( 'DOUBLESTAREQUAL' )
-add_token( 'DOUBLESLASH' )
-add_token( 'DOUBLESLASHEQUAL' )
-add_token( 'AT' )
-add_token( 'OP' )
-add_token( 'ERRORTOKEN' )
+ parser.add_token( 'ENDMARKER' )
+ parser.add_token( 'NAME' )
+ parser.add_token( 'NUMBER' )
+ parser.add_token( 'STRING' )
+ parser.add_token( 'NEWLINE' )
+ parser.add_token( 'INDENT' )
+ parser.add_token( 'DEDENT' )
+ parser.add_token( 'LPAR', "(" )
+ parser.add_token( 'RPAR', ")" )
+ parser.add_token( 'LSQB', "[" )
+ parser.add_token( 'RSQB', "]" )
+ parser.add_token( 'COLON', ":" )
+ parser.add_token( 'COMMA', "," )
+ parser.add_token( 'SEMI', ";" )
+ parser.add_token( 'PLUS', "+" )
+ parser.add_token( 'MINUS', "-" )
+ parser.add_token( 'STAR', "*" )
+ parser.add_token( 'SLASH', "/" )
+ parser.add_token( 'VBAR', "|" )
+ parser.add_token( 'AMPER', "&" )
+ parser.add_token( 'LESS', "<" )
+ parser.add_token( 'GREATER', ">" )
+ parser.add_token( 'EQUAL', "=" )
+ parser.add_token( 'DOT', "." )
+ parser.add_token( 'PERCENT', "%" )
+ parser.add_token( 'BACKQUOTE', "`" )
+ parser.add_token( 'LBRACE', "{" )
+ parser.add_token( 'RBRACE', "}" )
+ parser.add_token( 'EQEQUAL', "==" )
+ ne = parser.add_token( 'NOTEQUAL', "!=" )
+ parser.tok_values["<>"] = ne
+ parser.add_token( 'LESSEQUAL', "<=" )
+ parser.add_token( 'GREATEREQUAL', ">=" )
+ parser.add_token( 'TILDE', "~" )
+ parser.add_token( 'CIRCUMFLEX', "^" )
+ parser.add_token( 'LEFTSHIFT', "<<" )
+ parser.add_token( 'RIGHTSHIFT', ">>" )
+ parser.add_token( 'DOUBLESTAR', "**" )
+ parser.add_token( 'PLUSEQUAL', "+=" )
+ parser.add_token( 'MINEQUAL', "-=" )
+ parser.add_token( 'STAREQUAL', "*=" )
+ parser.add_token( 'SLASHEQUAL', "/=" )
+ parser.add_token( 'PERCENTEQUAL', "%=" )
+ parser.add_token( 'AMPEREQUAL', "&=" )
+ parser.add_token( 'VBAREQUAL', "|=" )
+ parser.add_token( 'CIRCUMFLEXEQUAL', "^=" )
+ parser.add_token( 'LEFTSHIFTEQUAL', "<<=" )
+ parser.add_token( 'RIGHTSHIFTEQUAL', ">>=" )
+ parser.add_token( 'DOUBLESTAREQUAL', "**=" )
+ parser.add_token( 'DOUBLESLASH', "//" )
+ parser.add_token( 'DOUBLESLASHEQUAL',"//=" )
+ parser.add_token( 'AT', "@" )
+ parser.add_token( 'OP' )
+ parser.add_token( 'ERRORTOKEN' )
# extra PyPy-specific tokens
-add_token( "COMMENT" )
-add_token( "NL" )
-
-# a reverse mapping from internal tokens def to more pythonic tokens
-tok_punct = {
- "&" : AMPER,
- "&=" : AMPEREQUAL,
- "`" : BACKQUOTE,
- "^" : CIRCUMFLEX,
- "^=" : CIRCUMFLEXEQUAL,
- ":" : COLON,
- "," : COMMA,
- "." : DOT,
- "//" : DOUBLESLASH,
- "//=" : DOUBLESLASHEQUAL,
- "**" : DOUBLESTAR,
- "**=" : DOUBLESTAREQUAL,
- "==" : EQEQUAL,
- "=" : EQUAL,
- ">" : GREATER,
- ">=" : GREATEREQUAL,
- "{" : LBRACE,
- "}" : RBRACE,
- "<<" : LEFTSHIFT,
- "<<=" : LEFTSHIFTEQUAL,
- "<" : LESS,
- "<=" : LESSEQUAL,
- "(" : LPAR,
- "[" : LSQB,
- "-=" : MINEQUAL,
- "-" : MINUS,
- "!=" : NOTEQUAL,
- "<>" : NOTEQUAL,
- "%" : PERCENT,
- "%=" : PERCENTEQUAL,
- "+" : PLUS,
- "+=" : PLUSEQUAL,
- ")" : RBRACE,
- ">>" : RIGHTSHIFT,
- ">>=" : RIGHTSHIFTEQUAL,
- ")" : RPAR,
- "]" : RSQB,
- ";" : SEMI,
- "/" : SLASH,
- "/=" : SLASHEQUAL,
- "*" : STAR,
- "*=" : STAREQUAL,
- "~" : TILDE,
- "|" : VBAR,
- "|=" : VBAREQUAL,
- "@": AT,
- }
-tok_rpunct = {}
-for string, value in tok_punct.items():
- tok_rpunct[value] = string
+ parser.add_token( "COMMENT" )
+ parser.add_token( "NL" )
+ # tok_rpunct = parser.tok_values.copy()
+ # for _name, _value in parser.tokens.items():
+ # globals()[_name] = _value
+ # setattr(parser, _name, _value)
Modified: pypy/dist/pypy/interpreter/pyparser/syntaxtree.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/syntaxtree.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/syntaxtree.py Wed Feb 28 18:30:48 2007
@@ -8,6 +8,8 @@
from pypy.tool.uid import uid
+from pypy.tool.uid import uid
+
class AbstractSyntaxVisitor(object):
def visit_syntaxnode( self, node ):
pass
Added: pypy/dist/pypy/interpreter/pyparser/test/expressions.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/interpreter/pyparser/test/expressions.py Wed Feb 28 18:30:48 2007
@@ -0,0 +1,504 @@
+"""
+list of tested expressions / suites (used by test_parser and test_astbuilder)
+"""
+
+constants = [
+ "0",
+ "7",
+ "-3",
+ "053",
+ "0x18",
+ "14L",
+ "1.0",
+ "3.9",
+ "-3.6",
+ "1.8e19",
+ "90000000000000",
+ "90000000000000.",
+ "3j"
+ ]
+
+expressions = [
+ "x = a + 1",
+ "x = 1 - a",
+ "x = a * b",
+ "x = a ** 2",
+ "x = a / b",
+ "x = a & b",
+ "x = a | b",
+ "x = a ^ b",
+ "x = a // b",
+ "x = a * b + 1",
+ "x = a + 1 * b",
+ "x = a * b / c",
+ "x = a * (1 + c)",
+ "x, y, z = 1, 2, 3",
+ "x = 'a' 'b' 'c'",
+ "del foo",
+ "del foo[bar]",
+ "del foo.bar",
+ "l[0]",
+ "k[v,]",
+ "m[a,b]",
+ "a.b.c[d]",
+ "file('some.txt').read()",
+ "a[0].read()",
+ "a[1:1].read()",
+ "f('foo')('bar')('spam')",
+ "f('foo')('bar')('spam').read()[0]",
+ "a.b[0][0]",
+ "a.b[0][:]",
+ "a.b[0][::]",
+ "a.b[0][0].pop()[0].push('bar')('baz').spam",
+ "a.b[0].read()[1][2].foo().spam()[0].bar",
+ "a**2",
+ "a**2**2",
+ "a.b[0]**2",
+ "a.b[0].read()[1][2].foo().spam()[0].bar ** 2",
+ "l[start:end] = l2",
+ "l[::] = l2",
+ "a = `s`",
+ "a = `1 + 2 + f(3, 4)`",
+ "[a, b] = c",
+ "(a, b) = c",
+ "[a, (b,c), d] = e",
+ "a, (b, c), d = e",
+ ]
+
+# We do not export the following tests because we would have to implement 2.5
+# features in the stable compiler (other than just building the AST).
+expressions_inbetweenversions = expressions + [
+ "1 if True else 2",
+ "1 if False else 2",
+ ]
+
+funccalls = [
+ "l = func()",
+ "l = func(10)",
+ "l = func(10, 12, a, b=c, *args)",
+ "l = func(10, 12, a, b=c, **kwargs)",
+ "l = func(10, 12, a, b=c, *args, **kwargs)",
+ "l = func(10, 12, a, b=c)",
+ "e = l.pop(3)",
+ "e = k.l.pop(3)",
+ "simplefilter('ignore', category=PendingDeprecationWarning, append=1)",
+ """methodmap = dict(subdirs=phase4,
+ same_files=phase3, diff_files=phase3, funny_files=phase3,
+ common_dirs = phase2, common_files=phase2, common_funny=phase2,
+ common=phase1, left_only=phase1, right_only=phase1,
+ left_list=phase0, right_list=phase0)""",
+ "odata = b2a_qp(data, quotetabs = quotetabs, header = header)",
+ ]
+
+listmakers = [
+ "l = []",
+ "l = [1, 2, 3]",
+ "l = [i for i in range(10)]",
+ "l = [i for i in range(10) if i%2 == 0]",
+ "l = [i for i in range(10) if i%2 == 0 or i%2 == 1]", # <--
+ "l = [i for i in range(10) if i%2 == 0 and i%2 == 1]",
+ "l = [i for j in range(10) for i in range(j)]",
+ "l = [i for j in range(10) for i in range(j) if j%2 == 0]",
+ "l = [i for j in range(10) for i in range(j) if j%2 == 0 and i%2 == 0]",
+ "l = [(a, b) for (a,b,c) in l2]",
+ "l = [{a:b} for (a,b,c) in l2]",
+ "l = [i for j in k if j%2 == 0 if j*2 < 20 for i in j if i%2==0]",
+ ]
+
+genexps = [
+ "l = (i for i in j)",
+ "l = (i for i in j if i%2 == 0)",
+ "l = (i for j in k for i in j)",
+ "l = (i for j in k for i in j if j%2==0)",
+ "l = (i for j in k if j%2 == 0 if j*2 < 20 for i in j if i%2==0)",
+ "l = (i for i in [ j*2 for j in range(10) ] )",
+ "l = [i for i in ( j*2 for j in range(10) ) ]",
+ "l = (i for i in [ j*2 for j in ( k*3 for k in range(10) ) ] )",
+ "l = [i for j in ( j*2 for j in [ k*3 for k in range(10) ] ) ]",
+ "l = f(i for i in j)",
+ ]
+
+
+dictmakers = [
+ "l = {a : b, 'c' : 0}",
+ "l = {}",
+ ]
+
+backtrackings = [
+ "f = lambda x: x+1",
+ "f = lambda x,y: x+y",
+ "f = lambda x,y=1,z=t: x+y",
+ "f = lambda x,y=1,z=t,*args,**kwargs: x+y",
+ "f = lambda x,y=1,z=t,*args: x+y",
+ "f = lambda x,y=1,z=t,**kwargs: x+y",
+ "f = lambda: 1",
+ "f = lambda *args: 1",
+ "f = lambda **kwargs: 1",
+ ]
+
+comparisons = [
+ "a < b",
+ "a > b",
+ "a not in b",
+ "a is not b",
+ "a in b",
+ "a is b",
+ "3 < x < 5",
+ "(3 < x) < 5",
+ "a < b < c < d",
+ "(a < b) < (c < d)",
+ "a < (b < c) < d",
+ ]
+
+multiexpr = [
+ 'a = b; c = d;',
+ 'a = b = c = d',
+ ]
+
+attraccess = [
+ 'a.b = 2',
+ 'x = a.b',
+ ]
+
+slices = [
+ "l[:]",
+ "l[::]",
+ "l[1:2]",
+ "l[1:]",
+ "l[:2]",
+ "l[1::]",
+ "l[:1:]",
+ "l[::1]",
+ "l[1:2:]",
+ "l[:1:2]",
+ "l[1::2]",
+ "l[0:1:2]",
+ "a.b.l[:]",
+ "a.b.l[1:2]",
+ "a.b.l[1:]",
+ "a.b.l[:2]",
+ "a.b.l[0:1:2]",
+ "a[1:2:3, 100]",
+ "a[:2:3, 100]",
+ "a[1::3, 100,]",
+ "a[1:2:, 100]",
+ "a[1:2, 100]",
+ "a[1:, 100,]",
+ "a[:2, 100]",
+ "a[:, 100]",
+ "a[100, 1:2:3,]",
+ "a[100, :2:3]",
+ "a[100, 1::3]",
+ "a[100, 1:2:,]",
+ "a[100, 1:2]",
+ "a[100, 1:]",
+ "a[100, :2,]",
+ "a[100, :]",
+ ]
+
+imports = [
+ 'import os',
+ 'import sys, os',
+ 'import os.path',
+ 'import os.path, sys',
+ 'import sys, os.path as osp',
+ 'import os.path as osp',
+ 'import os.path as osp, sys as _sys',
+ 'import a.b.c.d',
+ 'import a.b.c.d as abcd',
+ 'from os import path',
+ 'from os import path, system',
+ ]
+
+imports_newstyle = [
+ 'from os import path, system',
+ 'from os import path as P, system as S',
+ 'from os import (path as P, system as S,)',
+ 'from os import *',
+ ]
+
+if_stmts = [
+ "if a == 1: a+= 2",
+ """if a == 1:
+ a += 2
+elif a == 2:
+ a += 3
+else:
+ a += 4
+""",
+ "if a and not b == c: pass",
+ "if a and not not not b == c: pass",
+ "if 0: print 'foo'"
+ ]
+
+asserts = [
+ 'assert False',
+ 'assert a == 1',
+ 'assert a == 1 and b == 2',
+ 'assert a == 1 and b == 2, "assertion failed"',
+ ]
+
+execs = [
+ 'exec a',
+ 'exec "a=b+3"',
+ 'exec a in f()',
+ 'exec a in f(), g()',
+ ]
+
+prints = [
+ 'print',
+ 'print a',
+ 'print a,',
+ 'print a, b',
+ 'print a, "b", c',
+ 'print >> err',
+ 'print >> err, "error"',
+ 'print >> err, "error",',
+ 'print >> err, "error", a',
+ ]
+
+globs = [
+ 'global a',
+ 'global a,b,c',
+ ]
+
+raises_ = [ # NB. 'raises' creates a name conflict with py.test magic
+ 'raise',
+ 'raise ValueError',
+ 'raise ValueError("error")',
+ 'raise ValueError, "error"',
+ 'raise ValueError, "error", foo',
+ ]
+
+tryexcepts = [
+ """try:
+ a
+ b
+except:
+ pass
+""",
+ """try:
+ a
+ b
+except NameError:
+ pass
+""",
+ """try:
+ a
+ b
+except NameError, err:
+ pass
+""",
+ """try:
+ a
+ b
+except (NameError, ValueError):
+ pass
+""",
+ """try:
+ a
+ b
+except (NameError, ValueError), err:
+ pass
+""",
+ """try:
+ a
+except NameError, err:
+ pass
+except ValueError, err:
+ pass
+""",
+ """def f():
+ try:
+ a
+ except NameError, err:
+ a = 1
+ b = 2
+ except ValueError, err:
+ a = 2
+ return a
+"""
+ """try:
+ a
+except NameError, err:
+ a = 1
+except ValueError, err:
+ a = 2
+else:
+ a += 3
+""",
+ """try:
+ a
+finally:
+ b
+""",
+ """def f():
+ try:
+ return a
+ finally:
+ a = 3
+ return 1
+""",
+
+ ]
+
+one_stmt_funcdefs = [
+ "def f(): return 1",
+ "def f(x): return x+1",
+ "def f(x,y): return x+y",
+ "def f(x,y=1,z=t): return x+y",
+ "def f(x,y=1,z=t,*args,**kwargs): return x+y",
+ "def f(x,y=1,z=t,*args): return x+y",
+ "def f(x,y=1,z=t,**kwargs): return x+y",
+ "def f(*args): return 1",
+ "def f(**kwargs): return 1",
+ "def f(t=()): pass",
+ "def f(a, b, (c, d), e): pass",
+ "def f(a, b, (c, (d, e), f, (g, h))): pass",
+ "def f(a, b, (c, (d, e), f, (g, h)), i): pass",
+ "def f((a)): pass",
+ ]
+
+one_stmt_classdefs = [
+ "class Pdb(bdb.Bdb, cmd.Cmd): pass",
+ ]
+
+docstrings = [
+ '''def foo(): return 1''',
+ '''class Foo: pass''',
+ '''class Foo: "foo"''',
+ '''def foo():
+ """foo docstring"""
+ return 1
+''',
+ '''def foo():
+ """foo docstring"""
+ a = 1
+ """bar"""
+ return a
+''',
+ '''def foo():
+ """doc"""; print 1
+ a=1
+''',
+ '''"""Docstring""";print 1''',
+ ]
+
+returns = [
+ 'def f(): return',
+ 'def f(): return 1',
+ 'def f(): return a.b',
+ 'def f(): return a',
+ 'def f(): return a,b,c,d',
+ #'return (a,b,c,d)', --- this one makes no sense, as far as I can tell
+ ]
+
+augassigns = [
+ 'a=1;a+=2',
+ 'a=1;a-=2',
+ 'a=1;a*=2',
+ 'a=1;a/=2',
+ 'a=1;a//=2',
+ 'a=1;a%=2',
+ 'a=1;a**=2',
+ 'a=1;a>>=2',
+ 'a=1;a<<=2',
+ 'a=1;a&=2',
+ 'a=1;a^=2',
+ 'a=1;a|=2',
+
+ 'a=A();a.x+=2',
+ 'a=A();a.x-=2',
+ 'a=A();a.x*=2',
+ 'a=A();a.x/=2',
+ 'a=A();a.x//=2',
+ 'a=A();a.x%=2',
+ 'a=A();a.x**=2',
+ 'a=A();a.x>>=2',
+ 'a=A();a.x<<=2',
+ 'a=A();a.x&=2',
+ 'a=A();a.x^=2',
+ 'a=A();a.x|=2',
+
+ 'a=A();a[0]+=2',
+ 'a=A();a[0]-=2',
+ 'a=A();a[0]*=2',
+ 'a=A();a[0]/=2',
+ 'a=A();a[0]//=2',
+ 'a=A();a[0]%=2',
+ 'a=A();a[0]**=2',
+ 'a=A();a[0]>>=2',
+ 'a=A();a[0]<<=2',
+ 'a=A();a[0]&=2',
+ 'a=A();a[0]^=2',
+ 'a=A();a[0]|=2',
+
+ 'a=A();a[0:2]+=2',
+ 'a=A();a[0:2]-=2',
+ 'a=A();a[0:2]*=2',
+ 'a=A();a[0:2]/=2',
+ 'a=A();a[0:2]//=2',
+ 'a=A();a[0:2]%=2',
+ 'a=A();a[0:2]**=2',
+ 'a=A();a[0:2]>>=2',
+ 'a=A();a[0:2]<<=2',
+ 'a=A();a[0:2]&=2',
+ 'a=A();a[0:2]^=2',
+ 'a=A();a[0:2]|=2',
+ ]
+
+PY23_TESTS = [
+ constants,
+ expressions,
+ augassigns,
+ comparisons,
+ funccalls,
+ backtrackings,
+ listmakers, # ERRORS
+ dictmakers,
+ multiexpr,
+ attraccess,
+ slices,
+ imports,
+ execs,
+ prints,
+ globs,
+ raises_,
+
+ ]
+
+OPTIONAL_TESTS = [
+ # expressions_inbetweenversions,
+ genexps,
+ imports_newstyle,
+ asserts,
+ ]
+
+TESTS = PY23_TESTS + OPTIONAL_TESTS
+
+
+## TESTS = [
+## ["l = [i for i in range(10) if i%2 == 0 or i%2 == 1]"],
+## ]
+
+EXEC_INPUTS = [
+ one_stmt_classdefs,
+ one_stmt_funcdefs,
+ if_stmts,
+ tryexcepts,
+ docstrings,
+ returns,
+ ]
+
+SINGLE_INPUTS = [
+ one_stmt_funcdefs,
+ ['\t # hello\n',
+ 'print 6*7',
+ 'if 1: x\n',
+ 'x = 5',
+ 'x = 5 ',
+ '''"""Docstring""";print 1''',
+ '''"Docstring"''',
+ '''"Docstring" "\\x00"''',
+ ]
+]
Modified: pypy/dist/pypy/interpreter/pyparser/test/test_astbuilder.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/test/test_astbuilder.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/test/test_astbuilder.py Wed Feb 28 18:30:48 2007
@@ -2,7 +2,7 @@
from pypy.interpreter.pyparser import pythonparse
from pypy.interpreter.pyparser.astbuilder import AstBuilder
-from pypy.interpreter.pyparser.pythonutil import ast_from_input
+from pypy.interpreter.pyparser.pythonutil import ast_from_input, build_parser_for_version
from pypy.interpreter.stablecompiler.transformer import Transformer
import pypy.interpreter.stablecompiler.ast as test_ast
import pypy.interpreter.astcompiler.ast as ast_ast
@@ -13,6 +13,9 @@
from pypy.interpreter.astcompiler import ast
+
+from expressions import TESTS, SINGLE_INPUTS, EXEC_INPUTS
+
def arglist_equal(left,right):
"""needs special case because we handle the argumentlist differently"""
for l,r in zip(left,right):
@@ -142,211 +145,13 @@
return False
return True
-EXPECTED = {}
-
-constants = [
- "0",
- "7",
- "-3",
- "053",
- "0x18",
- "14L",
- "1.0",
- "3.9",
- "-3.6",
- "1.8e19",
- "90000000000000",
- "90000000000000.",
- "3j"
- ]
-
-expressions = [
- "x = a + 1",
- "x = 1 - a",
- "x = a * b",
- "x = a ** 2",
- "x = a / b",
- "x = a & b",
- "x = a | b",
- "x = a ^ b",
- "x = a // b",
- "x = a * b + 1",
- "x = a + 1 * b",
- "x = a * b / c",
- "x = a * (1 + c)",
- "x, y, z = 1, 2, 3",
- "x = 'a' 'b' 'c'",
- "del foo",
- "del foo[bar]",
- "del foo.bar",
- "l[0]",
- "k[v,]",
- "m[a,b]",
- "a.b.c[d]",
- "file('some.txt').read()",
- "a[0].read()",
- "a[1:1].read()",
- "f('foo')('bar')('spam')",
- "f('foo')('bar')('spam').read()[0]",
- "a.b[0][0]",
- "a.b[0][:]",
- "a.b[0][::]",
- "a.b[0][0].pop()[0].push('bar')('baz').spam",
- "a.b[0].read()[1][2].foo().spam()[0].bar",
- "a**2",
- "a**2**2",
- "a.b[0]**2",
- "a.b[0].read()[1][2].foo().spam()[0].bar ** 2",
- "l[start:end] = l2",
- "l[::] = l2",
- "a = `s`",
- "a = `1 + 2 + f(3, 4)`",
- "[a, b] = c",
- "(a, b) = c",
- "[a, (b,c), d] = e",
- "a, (b, c), d = e",
- ]
-
-# We do not export the following tests because we would have to implement 2.5
-# features in the stable compiler (other than just building the AST).
-expressions_inbetweenversions = expressions + [
- #"1 if True else 2", # Disabled 2.5 syntax
- #"1 if False else 2",
- ]
-
-EXPECTED["k[v,]"] = ("Module(None, Stmt([Discard(Subscript(Name('k'), 2, "
- "Tuple([Name('v')])))]))")
-EXPECTED["m[a,b]"] = ("Module(None, Stmt([Discard(Subscript(Name('m'), 2, "
- "Tuple([Name('a'), Name('b')])))]))")
-EXPECTED["1 if True else 2"] = ("Module(None, Stmt([Discard(CondExpr("
- "Name('True'), Const(1), Const(2)))]))")
-EXPECTED["1 if False else 2"] = ("Module(None, Stmt([Discard(CondExpr("
- "Name('False'), Const(1), Const(2)))]))")
-
-funccalls = [
- "l = func()",
- "l = func(10)",
- "l = func(10, 12, a, b=c, *args)",
- "l = func(10, 12, a, b=c, **kwargs)",
- "l = func(10, 12, a, b=c, *args, **kwargs)",
- "l = func(10, 12, a, b=c)",
- "e = l.pop(3)",
- "e = k.l.pop(3)",
- "simplefilter('ignore', category=PendingDeprecationWarning, append=1)",
- """methodmap = dict(subdirs=phase4,
- same_files=phase3, diff_files=phase3, funny_files=phase3,
- common_dirs = phase2, common_files=phase2, common_funny=phase2,
- common=phase1, left_only=phase1, right_only=phase1,
- left_list=phase0, right_list=phase0)""",
- "odata = b2a_qp(data, quotetabs = quotetabs, header = header)",
- ]
-
-listmakers = [
- "l = []",
- "l = [1, 2, 3]",
- "l = [i for i in range(10)]",
- "l = [i for i in range(10) if i%2 == 0]",
- "l = [i for i in range(10) if i%2 == 0 or i%2 == 1]",
- "l = [i for i in range(10) if i%2 == 0 and i%2 == 1]",
- "l = [i for j in range(10) for i in range(j)]",
- "l = [i for j in range(10) for i in range(j) if j%2 == 0]",
- "l = [i for j in range(10) for i in range(j) if j%2 == 0 and i%2 == 0]",
- "l = [(a, b) for (a,b,c) in l2]",
- "l = [{a:b} for (a,b,c) in l2]",
- "l = [i for j in k if j%2 == 0 if j*2 < 20 for i in j if i%2==0]",
- ]
-
-genexps = [
- "l = (i for i in j)",
- "l = (i for i in j if i%2 == 0)",
- "l = (i for j in k for i in j)",
- "l = (i for j in k for i in j if j%2==0)",
- "l = (i for j in k if j%2 == 0 if j*2 < 20 for i in j if i%2==0)",
- "l = (i for i in [ j*2 for j in range(10) ] )",
- "l = [i for i in ( j*2 for j in range(10) ) ]",
- "l = (i for i in [ j*2 for j in ( k*3 for k in range(10) ) ] )",
- "l = [i for j in ( j*2 for j in [ k*3 for k in range(10) ] ) ]",
- "l = f(i for i in j)",
- ]
-
-
-dictmakers = [
- "l = {a : b, 'c' : 0}",
- "l = {}",
- ]
-
-backtrackings = [
- "f = lambda x: x+1",
- "f = lambda x,y: x+y",
- "f = lambda x,y=1,z=t: x+y",
- "f = lambda x,y=1,z=t,*args,**kwargs: x+y",
- "f = lambda x,y=1,z=t,*args: x+y",
- "f = lambda x,y=1,z=t,**kwargs: x+y",
- "f = lambda: 1",
- "f = lambda *args: 1",
- "f = lambda **kwargs: 1",
- ]
+EXPECTED = {
+ "k[v,]" : "Module(None, Stmt([Discard(Subscript(Name('k'), 2, Tuple([Name('v')])))]))",
+ "m[a,b]" : "Module(None, Stmt([Discard(Subscript(Name('m'), 2, Tuple([Name('a'), Name('b')])))]))",
-comparisons = [
- "a < b",
- "a > b",
- "a not in b",
- "a is not b",
- "a in b",
- "a is b",
- "3 < x < 5",
- "(3 < x) < 5",
- "a < b < c < d",
- "(a < b) < (c < d)",
- "a < (b < c) < d",
- ]
-
-multiexpr = [
- 'a = b; c = d;',
- 'a = b = c = d',
- ]
+ "1 if True else 2" : "Module(None, Stmt([Discard(CondExpr(Name('True'), Const(1), Const(2)))]))",
+ "1 if False else 2" : "Module(None, Stmt([Discard(CondExpr(Name('False'), Const(1), Const(2)))]))",
-attraccess = [
- 'a.b = 2',
- 'x = a.b',
- ]
-
-slices = [
- "l[:]",
- "l[::]",
- "l[1:2]",
- "l[1:]",
- "l[:2]",
- "l[1::]",
- "l[:1:]",
- "l[::1]",
- "l[1:2:]",
- "l[:1:2]",
- "l[1::2]",
- "l[0:1:2]",
- "a.b.l[:]",
- "a.b.l[1:2]",
- "a.b.l[1:]",
- "a.b.l[:2]",
- "a.b.l[0:1:2]",
- "a[1:2:3, 100]",
- "a[:2:3, 100]",
- "a[1::3, 100,]",
- "a[1:2:, 100]",
- "a[1:2, 100]",
- "a[1:, 100,]",
- "a[:2, 100]",
- "a[:, 100]",
- "a[100, 1:2:3,]",
- "a[100, :2:3]",
- "a[100, 1::3]",
- "a[100, 1:2:,]",
- "a[100, 1:2]",
- "a[100, 1:]",
- "a[100, :2,]",
- "a[100, :]",
- ]
-EXPECTED.update({
"a[1:2:3, 100]": "Module(None, Stmt([Discard(Subscript(Name('a'), 2, Tuple([Sliceobj([Const(1), Const(2), Const(3)]), Const(100)])))]))",
"a[:2:3, 100]": "Module(None, Stmt([Discard(Subscript(Name('a'), 2, Tuple([Sliceobj([Const(None), Const(2), Const(3)]), Const(100)])))]))",
"a[1::3, 100,]": "Module(None, Stmt([Discard(Subscript(Name('a'), 2, Tuple([Sliceobj([Const(1), Const(None), Const(3)]), Const(100)])))]))",
@@ -363,307 +168,10 @@
"a[100, 1:]": "Module(None, Stmt([Discard(Subscript(Name('a'), 2, Tuple([Const(100), Sliceobj([Const(1), Const(None)])])))]))",
"a[100, :2,]": "Module(None, Stmt([Discard(Subscript(Name('a'), 2, Tuple([Const(100), Sliceobj([Const(None), Const(2)])])))]))",
"a[100, :]": "Module(None, Stmt([Discard(Subscript(Name('a'), 2, Tuple([Const(100), Sliceobj([Const(None), Const(None)])])))]))",
- })
-
-imports = [
- 'import os',
- 'import sys, os',
- 'import os.path',
- 'import os.path, sys',
- 'import sys, os.path as osp',
- 'import os.path as osp',
- 'import os.path as osp, sys as _sys',
- 'import a.b.c.d',
- 'import a.b.c.d as abcd',
- 'from os import path',
- 'from os import path, system',
- ]
-
-imports_newstyle = [
- 'from os import path, system',
- 'from os import path as P, system as S',
- 'from os import (path as P, system as S,)',
- 'from os import *',
- ]
-
-if_stmts = [
- "if a == 1: a+= 2",
- """if a == 1:
- a += 2
-elif a == 2:
- a += 3
-else:
- a += 4
-""",
- "if a and not b == c: pass",
- "if a and not not not b == c: pass",
- "if 0: print 'foo'"
- ]
-
-asserts = [
- 'assert False',
- 'assert a == 1',
- 'assert a == 1 and b == 2',
- 'assert a == 1 and b == 2, "assertion failed"',
- ]
-
-execs = [
- 'exec a',
- 'exec "a=b+3"',
- 'exec a in f()',
- 'exec a in f(), g()',
- ]
-
-prints = [
- 'print',
- 'print a',
- 'print a,',
- 'print a, b',
- 'print a, "b", c',
- 'print >> err',
- 'print >> err, "error"',
- 'print >> err, "error",',
- 'print >> err, "error", a',
- ]
-
-globs = [
- 'global a',
- 'global a,b,c',
- ]
-
-raises_ = [ # NB. 'raises' creates a name conflict with py.test magic
- 'raise',
- 'raise ValueError',
- 'raise ValueError("error")',
- 'raise ValueError, "error"',
- 'raise ValueError, "error", foo',
- ]
-
-tryexcepts = [
- """try:
- a
- b
-except:
- pass
-""",
- """try:
- a
- b
-except NameError:
- pass
-""",
- """try:
- a
- b
-except NameError, err:
- pass
-""",
- """try:
- a
- b
-except (NameError, ValueError):
- pass
-""",
- """try:
- a
- b
-except (NameError, ValueError), err:
- pass
-""",
- """try:
- a
-except NameError, err:
- pass
-except ValueError, err:
- pass
-""",
- """def f():
- try:
- a
- except NameError, err:
- a = 1
- b = 2
- except ValueError, err:
- a = 2
- return a
-"""
- """try:
- a
-except NameError, err:
- a = 1
-except ValueError, err:
- a = 2
-else:
- a += 3
-""",
- """try:
- a
-finally:
- b
-""",
- """def f():
- try:
- return a
- finally:
- a = 3
- return 1
-""",
-
- ]
-
-one_stmt_funcdefs = [
- "def f(): return 1",
- "def f(x): return x+1",
- "def f(x,y): return x+y",
- "def f(x,y=1,z=t): return x+y",
- "def f(x,y=1,z=t,*args,**kwargs): return x+y",
- "def f(x,y=1,z=t,*args): return x+y",
- "def f(x,y=1,z=t,**kwargs): return x+y",
- "def f(*args): return 1",
- "def f(**kwargs): return 1",
- "def f(t=()): pass",
- "def f(a, b, (c, d), e): pass",
- "def f(a, b, (c, (d, e), f, (g, h))): pass",
- "def f(a, b, (c, (d, e), f, (g, h)), i): pass",
- "def f((a)): pass",
- ]
-
-one_stmt_classdefs = [
- "class Pdb(bdb.Bdb, cmd.Cmd): pass",
- ]
-
-docstrings = [
- '''def foo(): return 1''',
- '''class Foo: pass''',
- '''class Foo: "foo"''',
- '''def foo():
- """foo docstring"""
- return 1
-''',
- '''def foo():
- """foo docstring"""
- a = 1
- """bar"""
- return a
-''',
- '''def foo():
- """doc"""; print 1
- a=1
-''',
- '''"""Docstring""";print 1''',
- ]
-
-returns = [
- 'def f(): return',
- 'def f(): return 1',
- 'def f(): return a.b',
- 'def f(): return a',
- 'def f(): return a,b,c,d',
- #'return (a,b,c,d)', --- this one makes no sense, as far as I can tell
- ]
-augassigns = [
- 'a=1;a+=2',
- 'a=1;a-=2',
- 'a=1;a*=2',
- 'a=1;a/=2',
- 'a=1;a//=2',
- 'a=1;a%=2',
- 'a=1;a**=2',
- 'a=1;a>>=2',
- 'a=1;a<<=2',
- 'a=1;a&=2',
- 'a=1;a^=2',
- 'a=1;a|=2',
-
- 'a=A();a.x+=2',
- 'a=A();a.x-=2',
- 'a=A();a.x*=2',
- 'a=A();a.x/=2',
- 'a=A();a.x//=2',
- 'a=A();a.x%=2',
- 'a=A();a.x**=2',
- 'a=A();a.x>>=2',
- 'a=A();a.x<<=2',
- 'a=A();a.x&=2',
- 'a=A();a.x^=2',
- 'a=A();a.x|=2',
-
- 'a=A();a[0]+=2',
- 'a=A();a[0]-=2',
- 'a=A();a[0]*=2',
- 'a=A();a[0]/=2',
- 'a=A();a[0]//=2',
- 'a=A();a[0]%=2',
- 'a=A();a[0]**=2',
- 'a=A();a[0]>>=2',
- 'a=A();a[0]<<=2',
- 'a=A();a[0]&=2',
- 'a=A();a[0]^=2',
- 'a=A();a[0]|=2',
-
- 'a=A();a[0:2]+=2',
- 'a=A();a[0:2]-=2',
- 'a=A();a[0:2]*=2',
- 'a=A();a[0:2]/=2',
- 'a=A();a[0:2]//=2',
- 'a=A();a[0:2]%=2',
- 'a=A();a[0:2]**=2',
- 'a=A();a[0:2]>>=2',
- 'a=A();a[0:2]<<=2',
- 'a=A();a[0:2]&=2',
- 'a=A();a[0:2]^=2',
- 'a=A();a[0:2]|=2',
- ]
-
-TESTS = [
- constants,
- expressions_inbetweenversions,
- augassigns,
- comparisons,
- funccalls,
- backtrackings,
- listmakers,
- genexps,
- dictmakers,
- multiexpr,
- attraccess,
- slices,
- imports,
- imports_newstyle,
- asserts,
- execs,
- prints,
- globs,
- raises_,
- ]
-
-EXEC_INPUTS = [
- one_stmt_classdefs,
- one_stmt_funcdefs,
- if_stmts,
- tryexcepts,
- docstrings,
- returns,
- ]
-
-SINGLE_INPUTS = [
- one_stmt_funcdefs,
- ['\t # hello\n',
- 'print 6*7',
- 'if 1: x\n',
- 'x = 5',
- 'x = 5 ',
- '''"""Docstring""";print 1''',
- '''"Docstring"''',
- '''"Docstring" "\\x00"''',
- ]
-]
-
-TARGET_DICT = {
- 'single' : 'single_input',
- 'exec' : 'file_input',
- 'eval' : 'eval_input',
+ # stablecompiler produces a Pass statement which does not seem very consistent
+ # (a module should only have a Stmt child)
+ "\t # hello\n": "Module(None, Stmt([]))",
}
@@ -705,39 +213,39 @@
builtin = dict(int=int, long=long, float=float, complex=complex)
-def ast_parse_expr(expr, target='single'):
- target = TARGET_DICT[target]
- builder = AstBuilder(space=FakeSpace())
- pythonparse.PYTHON_PARSER.parse_source(expr, target, builder)
- return builder
-
# Create parser from Grammar_stable, not current grammar.
-stable_grammar, _ = pythonparse.get_grammar_file("stable")
-stable_parser = pythonparse.python_grammar(stable_grammar)
+stable_parser = pythonparse.make_pyparser('stable')
+python_parser = pythonparse.make_pyparser() # 'native') # 2.5a')
def tuple_parse_expr(expr, target='single'):
t = Transformer("dummyfile")
return ast_from_input(expr, target, t, stable_parser)
-def check_expression(expr, target='single'):
- r1 = ast_parse_expr(expr, target)
+def source2ast(source, mode, space=FakeSpace()):
+ builder = AstBuilder(space=space, parser=python_parser)
+ python_parser.parse_source(source, mode, builder)
+ return builder.rule_stack[-1]
+
+def check_expression(expr, mode='single'):
+ pypy_ast = source2ast(expr, mode)
try:
- ast = EXPECTED[expr]
+ python_ast = EXPECTED[expr]
except KeyError:
# trust the stablecompiler's Transformer when no explicit result has
# been provided (although trusting it is a foolish thing to do)
- ast = tuple_parse_expr(expr, target)
+ python_ast = tuple_parse_expr(expr, mode)
check_lineno = True
else:
- if isinstance(ast, str):
- ast = eval(ast, ast_ast.__dict__)
+ if isinstance(python_ast, str):
+ python_ast = eval(python_ast, ast_ast.__dict__)
check_lineno = False
print "-" * 30
- print "ORIG :", ast
+ print "ORIG :", python_ast
print
- print "BUILT:", r1.rule_stack[-1]
+ print "BUILT:", pypy_ast
print "-" * 30
- assert nodes_equal(ast, r1.rule_stack[-1], check_lineno), 'failed on %r' % (expr)
+ assert nodes_equal(python_ast, pypy_ast, check_lineno), 'failed on %r' % (expr)
+
def test_basic_astgen():
for family in TESTS:
@@ -749,6 +257,7 @@
for expr in family:
yield check_expression, expr, 'exec'
+
NEW_GRAMMAR_SNIPPETS = [
'snippet_with_1.py',
'snippet_with_2.py',
@@ -810,7 +319,7 @@
for snippet_name in LIBSTUFF:
filepath = os.path.join(os.path.dirname(__file__), '../../../lib', snippet_name)
source = file(filepath).read()
- yield check_expression, source, 'exec'
+ yield check_expression, source, 'exec'
# FIXME: find the sys' attribute that define this
STDLIB_PATH = os.path.dirname(os.__file__)
Modified: pypy/dist/pypy/interpreter/pyparser/test/test_astcompiler.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/test/test_astcompiler.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/test/test_astcompiler.py Wed Feb 28 18:30:48 2007
@@ -1,4 +1,6 @@
+import sys
import os
+
from pypy.interpreter.pyparser import pythonparse
from pypy.interpreter.pyparser.astbuilder import AstBuilder
from pypy.interpreter.pyparser.tuplebuilder import TupleBuilder
@@ -11,93 +13,47 @@
from pypy.interpreter.astcompiler import ast, misc, pycodegen
-from test_astbuilder import expressions, comparisons, funccalls, backtrackings,\
- listmakers, genexps, dictmakers, multiexpr, attraccess, slices, imports,\
- asserts, execs, prints, globs, raises_, imports_newstyle, augassigns, \
- if_stmts, one_stmt_classdefs, one_stmt_funcdefs, tryexcepts, docstrings, \
- returns, SNIPPETS, SINGLE_INPUTS, LIBSTUFF, constants
-
-from test_astbuilder import FakeSpace
-
-
-TESTS = [
- constants,
- expressions,
- augassigns,
- comparisons,
- funccalls,
- backtrackings,
- listmakers,
- dictmakers,
- multiexpr,
- genexps,
- attraccess,
- slices,
- imports,
- execs,
- prints,
- globs,
- raises_,
-# EXEC_INPUTS
- one_stmt_classdefs,
- one_stmt_funcdefs,
- if_stmts,
- tryexcepts,
- docstrings,
- returns,
- ]
+from test_astbuilder import SNIPPETS, LIBSTUFF, FakeSpace, source2ast
+from expressions import PY23_TESTS, EXEC_INPUTS, SINGLE_INPUTS, OPTIONAL_TESTS
+
+TESTS = PY23_TESTS + EXEC_INPUTS
+
-import sys
if sys.version_info[0]==2 and sys.version_info[1]>=4:
# genexps and new style import don't work on python2.3
# TESTS.append(genexps) XXX: 2.4 optimizes bytecode so our comparison doesn't work
- TESTS.append(imports_newstyle)
- # assertions give different bytecode with 2.4 (optimize if __debug__)
- TESTS.append(asserts)
-TARGET_DICT = {
- 'single' : 'single_input',
- 'exec' : 'file_input',
- 'eval' : 'eval_input',
- }
-
-def ast_parse_expr(expr, target='single', space=FakeSpace()):
- target = TARGET_DICT[target]
- builder = AstBuilder(space=space)
- pythonparse.PYTHON_PARSER.parse_source(expr, target, builder)
- return builder.rule_stack[-1]
+ TESTS += OPTIONAL_TESTS
-def compile_with_astcompiler(expr, target='exec', space=FakeSpace()):
- ast = ast_parse_expr(expr, target='exec', space=space) # xxx exec: single not really tested, mumble
+def compile_with_astcompiler(expr, mode='exec', space=FakeSpace()):
+ ast = source2ast(expr, mode, space) # xxx exec: single not really tested, mumble
misc.set_filename('<?>', ast)
- if target == 'exec':
+ if mode == 'exec':
Generator = pycodegen.ModuleCodeGenerator
- elif target == 'single':
+ elif mode == 'single':
Generator = pycodegen.InteractiveCodeGenerator
- elif target == 'eval':
+ elif mode == 'eval':
Generator = pycodegen.ExpressionCodeGenerator
codegen = Generator(space, ast)
rcode = codegen.getCode()
return rcode
-
# Create parser from Grammar_stable, not current grammar.
-stable_grammar, _ = pythonparse.get_grammar_file("stable")
-stable_parser = pythonparse.python_grammar(stable_grammar)
+stable_parser = pythonparse.make_pyparser('stable')
-def compile_with_testcompiler(expr, target='exec', space=FakeSpace()):
- target2 = TARGET_DICT['exec'] # xxx exec: single not really tested
- builder = TupleBuilder()
- stable_parser.parse_source(expr, target2, builder)
+def compile_with_testcompiler(expr, mode='exec', space=FakeSpace()):
+ mode2 = 'exec' # xxx exec: single not really tested
+ builder = TupleBuilder(stable_parser)
+ stable_parser.parse_source(expr, mode2, builder)
tuples = builder.stack[-1].as_tuple(True)
from pypy.interpreter.stablecompiler import transformer, pycodegen, misc
ast = transformer.Transformer('<?>').compile_node(tuples)
misc.set_filename('<?>', ast)
- if target == 'exec':
+ if mode == 'exec':
Generator = pycodegen.ModuleCodeGenerator
- elif target == 'single':
+ elif mode == 'single':
Generator = pycodegen.InteractiveCodeGenerator
- elif target == 'eval':
+ elif mode == 'eval':
Generator = pycodegen.ExpressionCodeGenerator
codegen = Generator(ast)
rcode = codegen.getCode()
@@ -151,40 +107,27 @@
tuple(rcode.co_cellvars) )
return code
-def check_compile(expr, target='exec', quiet=False, space=None):
+def check_compile(expr, mode='exec', quiet=False, space=None):
if not quiet:
print "Compiling:", expr
if space is None:
space = std_space
- ac_code = compile_with_astcompiler(expr, target=target, space=space)
+ ac_code = compile_with_astcompiler(expr, mode=mode, space=space)
if expr == "k[v,]" or expr.startswith('"'): # module-level docstring
py.test.skip('comparison skipped, bug in "reference stable compiler"')
- sc_code = compile_with_testcompiler(expr, target=target)
+ sc_code = compile_with_testcompiler(expr, mode=mode)
compare_code(ac_code, sc_code, space=space)
-## def check_compile( expr ):
-## space = FakeSpace()
-## ast_tree = ast_parse_expr( expr, target='exec', space=space )
-## misc.set_filename("<?>", ast_tree)
-## print "Compiling:", expr
-## print ast_tree
-## codegenerator = pycodegen.ModuleCodeGenerator(space,ast_tree)
-## rcode = codegenerator.getCode()
-## code1 = to_code( rcode )
-## code2 = ast_compile( expr )
-## compare_code(code1,code2)
def test_compile_argtuple_1():
- #py.test.skip('will be tested when more basic stuff will work')
code = """def f( x, (y,z) ):
print x,y,z
"""
check_compile( code )
def test_compile_argtuple_2():
- #py.test.skip('will be tested when more basic stuff will work')
code = """def f( x, (y,(z,t)) ):
print x,y,z,t
"""
@@ -192,14 +135,12 @@
def test_compile_argtuple_3():
- #py.test.skip('will be tested when more basic stuff will work')
code = """def f( x, (y,(z,(t,u))) ):
print x,y,z,t,u
"""
check_compile( code )
-
def test_basic_astgen():
for family in TESTS:
for expr in family:
@@ -228,7 +169,7 @@
for snippet_name in LIBSTUFF:
filepath = os.path.join(os.path.dirname(__file__), '../../../lib', snippet_name)
source = file(filepath).read()
- yield check_compile, source, 'exec'
+ yield check_compile, source, 'exec'
def test_single_inputs():
Modified: pypy/dist/pypy/interpreter/pyparser/test/test_lookahead.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/test/test_lookahead.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/test/test_lookahead.py Wed Feb 28 18:30:48 2007
@@ -1,43 +1,37 @@
from pypy.interpreter.pyparser.grammar import Alternative, Sequence, KleeneStar, \
- Token, EmptyToken, build_first_sets
+ Token, Parser
class TestLookAheadBasics:
def setup_method(self, method):
- self.count = 0
- self.tok1 = Token(self.nextid(), 'foo')
- self.tok2 = Token(self.nextid(), 'bar')
- self.tok3 = Token(self.nextid(), 'foobar')
+ self.parser = Parser()
+ self.tok1 = self.parser.Token_n("t1", 'foo')
+ self.tok2 = self.parser.Token_n("t2", 'bar')
+ self.tok3 = self.parser.Token_n("t3", 'foobar')
self.tokens = [self.tok1, self.tok2, self.tok3]
- build_first_sets(self.tokens)
-
- def nextid(self):
- self.count+=1
- return self.count
+ self.parser.build_first_sets()
def test_basic_token(self):
assert self.tok1.first_set == [self.tok1]
-
def test_basic_alternative(self):
- alt = Alternative(self.nextid(), self.tokens)
- build_first_sets([alt])
+ alt = self.parser.Alternative_n("a1t", self.tokens)
+ self.parser.build_first_sets()
assert alt.first_set == self.tokens
def test_basic_sequence(self):
- seq = Sequence(self.nextid(), self.tokens)
- build_first_sets([seq])
+ seq = self.parser.Sequence_n("seq", self.tokens)
+ self.parser.build_first_sets()
assert seq.first_set == [self.tokens[0]]
def test_basic_kleenstar(self):
tok1, tok2, tok3 = self.tokens
- kstar = KleeneStar(self.nextid(), 1, 3, tok1)
- build_first_sets([kstar])
- assert kstar.first_set == [tok1]
- kstar = KleeneStar(self.nextid(), 0, 3, tok1)
- build_first_sets([kstar])
- assert kstar.first_set == [tok1, EmptyToken]
+ kstar1 = self.parser.KleeneStar_n("k", 1, 3, tok1)
+ kstar2 = self.parser.KleeneStar_n("k2", 0, 3, tok1)
+ self.parser.build_first_sets()
+ assert kstar1.first_set == [tok1]
+ assert kstar2.first_set == [tok1, self.parser.EmptyToken]
def test_maybe_empty_sequence(self):
@@ -45,11 +39,11 @@
==> S.first_set = [tok1, tok2, EmptyToken]
"""
tok1, tok2, tok3 = self.tokens
- k1 = KleeneStar(self.nextid(), 0, 2, tok1)
- k2 = KleeneStar(self.nextid(), 0, 2, tok2)
- seq = Sequence(self.nextid(), [k1, k2])
- build_first_sets([k1, k2, seq])
- assert seq.first_set == [tok1, tok2, EmptyToken]
+ k1 = self.parser.KleeneStar_n( "k1", 0, 2, tok1)
+ k2 = self.parser.KleeneStar_n("k2", 0, 2, tok2)
+ seq = self.parser.Sequence_n( "seq", [k1, k2])
+ self.parser.build_first_sets()
+ assert seq.first_set == [tok1, tok2, self.parser.EmptyToken]
def test_not_empty_sequence(self):
@@ -57,41 +51,42 @@
==> S.first_set = [tok1, tok2]
"""
tok1, tok2, tok3 = self.tokens
- k1 = KleeneStar(self.nextid(), 0, 2, tok1)
- k2 = KleeneStar(self.nextid(), 1, 2, tok2)
- seq = Sequence(self.nextid(), [k1, k2])
- build_first_sets([k1, k2, seq])
+ k1 = self.parser.KleeneStar_n("k1", 0, 2, tok1)
+ k2 = self.parser.KleeneStar_n("k2", 1, 2, tok2)
+ seq = self.parser.Sequence_n("seq", [k1, k2])
+ self.parser.build_first_sets()
assert seq.first_set == [tok1, tok2]
-def test_token_comparison():
- assert Token(1, 'foo') == Token(1, 'foo')
- assert Token(1, 'foo') != Token(2, 'foo')
- assert Token(2, 'foo') != Token(2, None)
+ def test_token_comparison(self):
+ tok1 = self.parser.Token_n( "tok1", "foo" )
+ tok1b = self.parser.Token_n( "tok1", "foo" )
+ tok2 = self.parser.Token_n( "tok2", "foo" )
+ tok3 = self.parser.Token_n( "tok2", None )
+ assert tok1 == tok1b
+ assert tok1 != tok2
+ assert tok2 != tok3
-LOW = 1
-CAP = 2
-R_A = 3
-R_B = 4
-R_C = 5
-R_k1 = 6
-R_k2 = 7
class TestLookAhead:
def setup_method(self, method):
- self.LOW = Token(LOW, 'low')
- self.CAP = Token(CAP ,'cap')
- self.A = Alternative(R_A, [])
- k1 = KleeneStar(R_k1, 0, rule=self.LOW)
- k2 = KleeneStar(R_k2, 0, rule=self.CAP)
- self.B = Sequence(R_B, [k1, self.A])
- self.C = Sequence(R_C, [k2, self.A])
+ p = self.parser = Parser()
+ self.LOW = p.Token_n( 'LOW', 'low')
+ self.CAP = p.Token_n( 'CAP' ,'cap')
+ self.A = p.Alternative_n( 'R_A', [])
+ k1 = p.KleeneStar_n( 'R_k1', 0, rule=self.LOW)
+ k2 = p.KleeneStar_n( 'R_k2', 0, rule=self.CAP)
+ self.B = p.Sequence_n( 'R_B', [k1, self.A])
+ self.C = p.Sequence_n( 'R_C', [k2, self.A])
self.A.args = [self.B, self.C]
- build_first_sets([self.A, self.B, self.C, self.LOW, self.CAP, k1, k2])
+ p.build_first_sets()
def test_S_first_set(self):
- for s in [Token(LOW, 'low'), EmptyToken, Token(CAP, 'cap')]:
+ p = self.parser
+ LOW = p.tokens['LOW']
+ CAP = p.tokens['CAP']
+ for s in [Token(p, LOW, 'low'), p.EmptyToken, Token(p, CAP, 'cap')]:
assert s in self.A.first_set
assert s in self.B.first_set
assert s in self.C.first_set
Added: pypy/dist/pypy/interpreter/pyparser/test/test_parser.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/interpreter/pyparser/test/test_parser.py Wed Feb 28 18:30:48 2007
@@ -0,0 +1,45 @@
+
+from pypy.interpreter.pyparser.grammar import Parser
+
+
+
+def test_symbols():
+ p = Parser()
+ x1 = p.add_symbol('sym')
+ x2 = p.add_token('tok')
+ x3 = p.add_anon_symbol(':sym')
+ x4 = p.add_anon_symbol(':sym1')
+ # test basic numbering assumption
+ # symbols and tokens are attributed sequentially
+ # using the same counter
+ assert x2 == x1 + 1
+ # anon symbols have negative value
+ assert x3 != x2 + 1
+ assert x4 == x3 - 1
+ assert x3 < 0
+ y1 = p.add_symbol('sym')
+ assert y1 == x1
+ y2 = p.add_token('tok')
+ assert y2 == x2
+ y3 = p.add_symbol(':sym')
+ assert y3 == x3
+ y4 = p.add_symbol(':sym1')
+ assert y4 == x4
+
+
+def test_load():
+ d = { 5 : 'sym1',
+ 6 : 'sym2',
+ 9 : 'sym3',
+ }
+ p = Parser()
+ p.load_symbols( d )
+ v = p.add_symbol('sym4')
+ # check that we avoid numbering conflicts
+ assert v>9
+ v = p.add_symbol( 'sym1' )
+ assert v == 5
+ v = p.add_symbol( 'sym2' )
+ assert v == 6
+ v = p.add_symbol( 'sym3' )
+ assert v == 9
Modified: pypy/dist/pypy/interpreter/pyparser/test/test_pytokenizer.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/test/test_pytokenizer.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/test/test_pytokenizer.py Wed Feb 28 18:30:48 2007
@@ -1,17 +1,26 @@
from pypy.interpreter.pyparser.pythonlexer import Source, TokenError, \
match_encoding_declaration
from pypy.interpreter.pyparser.grammar import Token, GrammarElement
-from pypy.interpreter.pyparser.pytoken import EQUAL, ENDMARKER, LSQB, MINUS, NAME, NEWLINE, NULLTOKEN, NUMBER, RSQB, STRING
+from pypy.interpreter.pyparser.pythonparse import make_pyparser
-from pypy.interpreter.pyparser.pytoken import tok_name, tok_punct
-GrammarElement.symbols = tok_name
+P = make_pyparser()
+EQUAL = P.tokens['EQUAL']
+ENDMARKER = P.tokens['ENDMARKER']
+LSQB = P.tokens['LSQB']
+MINUS = P.tokens['MINUS']
+NAME = P.tokens['NAME']
+NEWLINE = P.tokens['NEWLINE']
+NULLTOKEN = P.tokens['NULLTOKEN']
+NUMBER = P.tokens['NUMBER']
+RSQB = P.tokens['RSQB']
+STRING = P.tokens['STRING']
def parse_source(source):
"""returns list of parsed tokens"""
- lexer = Source(source.splitlines(True))
+ lexer = Source( P, source.splitlines(True))
tokens = []
- last_token = Token(NULLTOKEN, None)
+ last_token = Token( P, NULLTOKEN, None)
while last_token.codename != ENDMARKER:
last_token = lexer.next()
tokens.append(last_token)
@@ -49,24 +58,24 @@
s = """['a'
]"""
tokens = parse_source(s)
- assert tokens[:4] == [Token(LSQB, None), Token(STRING, "'a'"),
- Token(RSQB, None), Token(NEWLINE, '')]
+ assert tokens[:4] == [Token(P, LSQB, None), Token(P, STRING, "'a'"),
+ Token(P, RSQB, None), Token(P, NEWLINE, '')]
def test_numbers():
"""make sure all kind of numbers are correctly parsed"""
for number in NUMBERS:
- assert parse_source(number)[0] == Token(NUMBER, number)
+ assert parse_source(number)[0] == Token(P, NUMBER, number)
neg = '-%s' % number
- assert parse_source(neg)[:2] == [Token(MINUS, None),
- Token(NUMBER, number)]
+ assert parse_source(neg)[:2] == [Token(P, MINUS, None),
+ Token(P, NUMBER, number)]
for number in BAD_NUMBERS:
- assert parse_source(number)[0] != Token(NUMBER, number)
+ assert parse_source(number)[0] != Token(P, NUMBER, number)
def test_hex_number():
"""basic pasrse"""
tokens = parse_source("a = 0x12L")
- assert tokens[:4] == [Token(NAME, 'a'), Token(EQUAL, None),
- Token(NUMBER, '0x12L'), Token(NEWLINE, '')]
+ assert tokens[:4] == [Token(P, NAME, 'a'), Token(P, EQUAL, None),
+ Token(P, NUMBER, '0x12L'), Token(P, NEWLINE, '')]
def test_punct():
"""make sure each punctuation is correctly parsed"""
@@ -81,7 +90,7 @@
tokens = [tok for tok, _, _, _ in error.token_stack]
if prefix:
tokens.pop(0)
- assert tokens[0].codename == tok_punct[pstr]
+ assert tokens[0].codename == P.tok_values[pstr]
def test_encoding_declarations_match():
Modified: pypy/dist/pypy/interpreter/pyparser/test/test_samples.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/test/test_samples.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/test/test_samples.py Wed Feb 28 18:30:48 2007
@@ -6,13 +6,12 @@
import py.test
from pypy.interpreter.pyparser.pythonutil import python_parsefile, \
- pypy_parsefile, python_parse, pypy_parse
+ pypy_parsefile, pypy_parse, python_parse, get_grammar_file, PYTHON_VERSION
from pypy.interpreter.pyparser import grammar
from pypy.interpreter.pyparser.pythonlexer import TokenError
-from pypy.interpreter.pyparser.pythonparse import PYTHON_VERSION, PYPY_VERSION
grammar.DEBUG = False
-
+_, PYPY_VERSION = get_grammar_file("2.4")
# these samples are skipped if the native version of Python does not match
# the version of the grammar we use
GRAMMAR_MISMATCH = PYTHON_VERSION != PYPY_VERSION
Modified: pypy/dist/pypy/interpreter/pyparser/tuplebuilder.py
==============================================================================
--- pypy/dist/pypy/interpreter/pyparser/tuplebuilder.py (original)
+++ pypy/dist/pypy/interpreter/pyparser/tuplebuilder.py Wed Feb 28 18:30:48 2007
@@ -1,6 +1,5 @@
-from grammar import AbstractBuilder, AbstractContext
-from pytoken import tok_name, tok_rpunct, NEWLINE, INDENT, DEDENT, ENDMARKER
+from grammar import AbstractBuilder, AbstractContext, Parser
class StackElement:
"""wraps TupleBuilder's tuples"""
@@ -53,16 +52,18 @@
class TupleBuilderContext(AbstractContext):
def __init__(self, stackpos ):
self.stackpos = stackpos
-
+
class TupleBuilder(AbstractBuilder):
"""A builder that directly produce the AST"""
- def __init__(self, rules=None, debug=0, lineno=True):
- AbstractBuilder.__init__(self, rules, debug)
+ def __init__(self, parser, debug=0, lineno=True):
+ AbstractBuilder.__init__(self, parser, debug)
# This attribute is here for convenience
self.source_encoding = None
self.lineno = lineno
self.stack = []
+ self.space_token = ( self.parser.tokens['NEWLINE'], self.parser.tokens['INDENT'],
+ self.parser.tokens['DEDENT'], self.parser.tokens['ENDMARKER'] )
def context(self):
"""Returns the state of the builder to be restored later"""
@@ -80,7 +81,7 @@
nodes = expand_nodes( [self.stack[-1]] )
self.stack[-1] = NonTerminal( rule.codename, nodes )
return True
-
+
def sequence(self, rule, source, elts_number):
""" """
num = rule.codename
@@ -97,8 +98,8 @@
def token(self, codename, value, source):
lineno = source._token_lnum
if value is None:
- if codename not in ( NEWLINE, INDENT, DEDENT, ENDMARKER ):
- value = tok_rpunct.get(codename, "unknown op")
+ if codename not in self.space_token:
+ value = self.parser.tok_rvalues.get(codename, "unknown op")
else:
value = ''
self.stack.append( Terminal(codename, value, lineno) )
Modified: pypy/dist/pypy/interpreter/stablecompiler/transformer.py
==============================================================================
--- pypy/dist/pypy/interpreter/stablecompiler/transformer.py (original)
+++ pypy/dist/pypy/interpreter/stablecompiler/transformer.py Wed Feb 28 18:30:48 2007
@@ -26,9 +26,7 @@
# and replace OWNER, ORGANIZATION, and YEAR as appropriate.
# make sure we import the parser with the correct grammar
-from pypy.interpreter.pyparser import pythonparse
-
-import pypy.interpreter.pyparser.pythonparse as pythonparse
+from pypy.interpreter.pyparser.pythonparse import make_pyparser
from pypy.interpreter.stablecompiler.ast import *
import parser
@@ -36,15 +34,16 @@
import sys
# Create parser from Grammar_stable, not current grammar.
-stable_grammar, _ = pythonparse.get_grammar_file("stable")
-stable_parser = pythonparse.python_grammar(stable_grammar)
+# stable_grammar, _ = pythonparse.get_grammar_file("stable")
+# stable_parser = pythonparse.python_grammar(stable_grammar)
-sym_name = stable_parser.symbols.sym_name
+stable_parser = make_pyparser('stable')
class symbol:
pass
-
-for value, name in sym_name.iteritems():
+sym_name = {}
+for name, value in stable_parser.symbols.items():
+ sym_name[value] = name
setattr(symbol, name, value)
# transforming is requiring a lot of recursion depth so make sure we have enough
@@ -58,6 +57,7 @@
from consts import CO_VARARGS, CO_VARKEYWORDS
from consts import OP_ASSIGN, OP_DELETE, OP_APPLY
+
def parseFile(path):
f = open(path, "U")
# XXX The parser API tolerates files without a trailing newline,
@@ -130,14 +130,15 @@
for value, name in sym_name.items():
if hasattr(self, name):
self._dispatch[value] = getattr(self, name)
- self._dispatch[token.NEWLINE] = self.com_NEWLINE
- self._atom_dispatch = {token.LPAR: self.atom_lpar,
- token.LSQB: self.atom_lsqb,
- token.LBRACE: self.atom_lbrace,
- token.BACKQUOTE: self.atom_backquote,
- token.NUMBER: self.atom_number,
- token.STRING: self.atom_string,
- token.NAME: self.atom_name,
+
+ self._dispatch[stable_parser.tokens['NEWLINE']] = self.com_NEWLINE
+ self._atom_dispatch = {stable_parser.tokens['LPAR']: self.atom_lpar,
+ stable_parser.tokens['LSQB']: self.atom_lsqb,
+ stable_parser.tokens['LBRACE']: self.atom_lbrace,
+ stable_parser.tokens['BACKQUOTE']: self.atom_backquote,
+ stable_parser.tokens['NUMBER']: self.atom_number,
+ stable_parser.tokens['STRING']: self.atom_string,
+ stable_parser.tokens['NAME']: self.atom_name,
}
self.encoding = None
@@ -206,7 +207,7 @@
def single_input(self, node):
# NEWLINE | simple_stmt | compound_stmt NEWLINE
n = node[0][0]
- if n != token.NEWLINE:
+ if n != stable_parser.tokens['NEWLINE']:
stmt = self.com_stmt(node[0])
else:
stmt = Pass()
@@ -216,14 +217,13 @@
doc = self.get_docstring(nodelist, symbol.file_input)
stmts = []
for node in nodelist:
- if node[0] != token.ENDMARKER and node[0] != token.NEWLINE:
+ if node[0] != stable_parser.tokens['ENDMARKER'] and node[0] != stable_parser.tokens['NEWLINE']:
self.com_append_stmt(stmts, node)
if doc is not None:
assert isinstance(stmts[0], Discard)
assert isinstance(stmts[0].expr, Const)
del stmts[0]
-
return Module(doc, Stmt(stmts))
def eval_input(self, nodelist):
@@ -238,8 +238,8 @@
item = self.atom_name(nodelist)
i = 1
while i < listlen:
- assert nodelist[i][0] == token.DOT
- assert nodelist[i + 1][0] == token.NAME
+ assert nodelist[i][0] == stable_parser.tokens['DOT']
+ assert nodelist[i + 1][0] == stable_parser.tokens['NAME']
item = Getattr(item, nodelist[i + 1][1])
i += 2
@@ -248,14 +248,14 @@
def decorator(self, nodelist):
# '@' dotted_name [ '(' [arglist] ')' ]
assert len(nodelist) in (3, 5, 6)
- assert nodelist[0][0] == token.AT
- assert nodelist[-1][0] == token.NEWLINE
+ assert nodelist[0][0] == stable_parser.tokens['AT']
+ assert nodelist[-1][0] == stable_parser.tokens['NEWLINE']
assert nodelist[1][0] == symbol.dotted_name
funcname = self.decorator_name(nodelist[1][1:])
if len(nodelist) > 3:
- assert nodelist[2][0] == token.LPAR
+ assert nodelist[2][0] == stable_parser.tokens['LPAR']
expr = self.com_call_function(funcname, nodelist[3])
else:
expr = funcname
@@ -328,7 +328,7 @@
# classdef: 'class' NAME ['(' testlist ')'] ':' suite
name = nodelist[1][1]
doc = self.get_docstring(nodelist[-1])
- if nodelist[2][0] == token.COLON:
+ if nodelist[2][0] == stable_parser.tokens['COLON']:
bases = []
else:
bases = self.com_bases(nodelist[3])
@@ -397,7 +397,7 @@
exprNode = self.lookup_node(en)(en[1:])
if len(nodelist) == 1:
return Discard(exprNode, lineno=exprNode.lineno)
- if nodelist[1][0] == token.EQUAL:
+ if nodelist[1][0] == stable_parser.tokens['EQUAL']:
nodesl = []
for i in range(0, len(nodelist) - 2, 2):
nodesl.append(self.com_assign(nodelist[i], OP_ASSIGN))
@@ -414,9 +414,9 @@
if len(nodelist) == 1:
start = 1
dest = None
- elif nodelist[1][0] == token.RIGHTSHIFT:
+ elif nodelist[1][0] == stable_parser.tokens['RIGHTSHIFT']:
assert len(nodelist) == 3 \
- or nodelist[3][0] == token.COMMA
+ or nodelist[3][0] == stable_parser.tokens['COMMA']
dest = self.com_node(nodelist[2])
start = 4
else:
@@ -424,7 +424,7 @@
start = 1
for i in range(start, len(nodelist), 2):
items.append(self.com_node(nodelist[i]))
- if nodelist[-1][0] == token.COMMA:
+ if nodelist[-1][0] == stable_parser.tokens['COMMA']:
return Print(items, dest, lineno=nodelist[0][2])
return Printnl(items, dest, lineno=nodelist[0][2])
@@ -482,15 +482,15 @@
assert nodelist[1][0] == symbol.dotted_name
assert nodelist[2][1] == 'import'
fromname = self.com_dotted_name(nodelist[1])
- if nodelist[3][0] == token.STAR:
+ if nodelist[3][0] == stable_parser.tokens['STAR']:
return From(fromname, [('*', None)],
lineno=nodelist[0][2])
else:
- if nodelist[3][0] == token.LPAR:
+ if nodelist[3][0] == stable_parser.tokens['LPAR']:
node = nodelist[4]
else:
node = nodelist[3]
- if node[-1][0] == token.COMMA:
+ if node[-1][0] == stable_parser.tokens['COMMA']:
self.syntaxerror("trailing comma not allowed without surrounding parentheses", node)
return From(fromname, self.com_import_as_names(node),
lineno=nodelist[0][2])
@@ -608,6 +608,7 @@
return self.com_generator_expression(test, nodelist[1])
return self.testlist(nodelist)
+
def test(self, nodelist):
# test: or_test ['if' or_test 'else' test] | lambdef
if len(nodelist) == 1:
@@ -618,11 +619,13 @@
return self.com_node(nodelist[0])
elif len(nodelist) == 5 and nodelist[1][0] =='if':
# Here we implement conditional expressions
- return ast.CondExpr(nodelist[2], nodelist[0], nodelist[4],
- nodelist[1].lineno)
+ # XXX: CPython's nodename is IfExp, not CondExpr
+ return CondExpr(delist[2], nodelist[0], nodelist[4],
+ nodelist[1].lineno)
else:
return self.com_binary(Or, nodelist)
+
def and_test(self, nodelist):
# not_test ('and' not_test)*
return self.com_binary(And, nodelist)
@@ -634,6 +637,9 @@
assert len(nodelist) == 1
return self.com_node(nodelist[0])
+ # XXX
+ # test = old_test
+
def or_test(self, nodelist):
# or_test: and_test ('or' and_test)*
return self.com_binary(Or, nodelist)
@@ -658,7 +664,7 @@
# comp_op: '<' | '>' | '=' | '>=' | '<=' | '<>' | '!=' | '=='
# | 'in' | 'not' 'in' | 'is' | 'is' 'not'
n = nl[1]
- if n[0] == token.NAME:
+ if n[0] == stable_parser.tokens['NAME']:
type = n[1]
if len(nl) == 3:
if type == 'not':
@@ -695,9 +701,9 @@
node = self.com_node(nodelist[0])
for i in range(2, len(nodelist), 2):
right = self.com_node(nodelist[i])
- if nodelist[i-1][0] == token.LEFTSHIFT:
+ if nodelist[i-1][0] == stable_parser.tokens['LEFTSHIFT']:
node = LeftShift([node, right], lineno=nodelist[1][2])
- elif nodelist[i-1][0] == token.RIGHTSHIFT:
+ elif nodelist[i-1][0] == stable_parser.tokens['RIGHTSHIFT']:
node = RightShift([node, right], lineno=nodelist[1][2])
else:
raise ValueError, "unexpected token: %s" % nodelist[i-1][0]
@@ -707,9 +713,9 @@
node = self.com_node(nodelist[0])
for i in range(2, len(nodelist), 2):
right = self.com_node(nodelist[i])
- if nodelist[i-1][0] == token.PLUS:
+ if nodelist[i-1][0] == stable_parser.tokens['PLUS']:
node = Add([node, right], lineno=nodelist[1][2])
- elif nodelist[i-1][0] == token.MINUS:
+ elif nodelist[i-1][0] == stable_parser.tokens['MINUS']:
node = Sub([node, right], lineno=nodelist[1][2])
else:
raise ValueError, "unexpected token: %s" % nodelist[i-1][0]
@@ -720,13 +726,13 @@
for i in range(2, len(nodelist), 2):
right = self.com_node(nodelist[i])
t = nodelist[i-1][0]
- if t == token.STAR:
+ if t == stable_parser.tokens['STAR']:
node = Mul([node, right])
- elif t == token.SLASH:
+ elif t == stable_parser.tokens['SLASH']:
node = Div([node, right])
- elif t == token.PERCENT:
+ elif t == stable_parser.tokens['PERCENT']:
node = Mod([node, right])
- elif t == token.DOUBLESLASH:
+ elif t == stable_parser.tokens['DOUBLESLASH']:
node = FloorDiv([node, right])
else:
raise ValueError, "unexpected token: %s" % t
@@ -738,11 +744,11 @@
t = elt[0]
node = self.lookup_node(nodelist[-1])(nodelist[-1][1:])
# need to handle (unary op)constant here...
- if t == token.PLUS:
+ if t == stable_parser.tokens['PLUS']:
return UnaryAdd(node, lineno=elt[2])
- elif t == token.MINUS:
+ elif t == stable_parser.tokens['MINUS']:
return UnarySub(node, lineno=elt[2])
- elif t == token.TILDE:
+ elif t == stable_parser.tokens['TILDE']:
node = Invert(node, lineno=elt[2])
return node
@@ -751,7 +757,7 @@
node = self.com_node(nodelist[0])
for i in range(1, len(nodelist)):
elt = nodelist[i]
- if elt[0] == token.DOUBLESTAR:
+ if elt[0] == stable_parser.tokens['DOUBLESTAR']:
return Power([node, self.com_node(nodelist[i+1])],
lineno=elt[2])
@@ -765,17 +771,17 @@
return n
def atom_lpar(self, nodelist):
- if nodelist[1][0] == token.RPAR:
+ if nodelist[1][0] == stable_parser.tokens['RPAR']:
return Tuple(())
return self.com_node(nodelist[1])
def atom_lsqb(self, nodelist):
- if nodelist[1][0] == token.RSQB:
+ if nodelist[1][0] == stable_parser.tokens['RSQB']:
return List([], lineno=nodelist[0][2])
return self.com_list_constructor(nodelist[1], nodelist[0][2])
def atom_lbrace(self, nodelist):
- if nodelist[1][0] == token.RBRACE:
+ if nodelist[1][0] == stable_parser.tokens['RBRACE']:
return Dict(())
return self.com_dictmaker(nodelist[1])
@@ -850,10 +856,10 @@
i = 0
while i < len(nodelist):
node = nodelist[i]
- if node[0] == token.STAR or node[0] == token.DOUBLESTAR:
- if node[0] == token.STAR:
+ if node[0] == stable_parser.tokens['STAR'] or node[0] == stable_parser.tokens['DOUBLESTAR']:
+ if node[0] == stable_parser.tokens['STAR']:
node = nodelist[i+1]
- if node[0] == token.NAME:
+ if node[0] == stable_parser.tokens['NAME']:
name = node[1]
if name in names:
self.syntaxerror("duplicate argument '%s' in function definition" %
@@ -865,7 +871,7 @@
if i < len(nodelist):
# should be DOUBLESTAR
t = nodelist[i][0]
- if t == token.DOUBLESTAR:
+ if t == stable_parser.tokens['DOUBLESTAR']:
node = nodelist[i+1]
else:
raise ValueError, "unexpected token: %s" % t
@@ -891,7 +897,7 @@
self.syntaxerror("non-default argument follows default argument",node)
break
- if nodelist[i][0] == token.EQUAL:
+ if nodelist[i][0] == stable_parser.tokens['EQUAL']:
defaults.append(self.com_node(nodelist[i + 1]))
i = i + 2
elif len(defaults):
@@ -905,7 +911,7 @@
def com_fpdef(self, node):
# fpdef: NAME | '(' fplist ')'
- if node[1][0] == token.LPAR:
+ if node[1][0] == stable_parser.tokens['LPAR']:
return self.com_fplist(node[2])
return node[1][1]
@@ -922,7 +928,7 @@
# String together the dotted names and return the string
name = ""
for n in node:
- if type(n) == type(()) and n[0] == 1:
+ if type(n) == type(()) and n[0] == stable_parser.tokens['NAME']:
name = name + n[1] + '.'
return name[:-1]
@@ -933,7 +939,7 @@
if len(node) == 1:
return dot, None
assert node[1][1] == 'as'
- assert node[2][0] == token.NAME
+ assert node[2][0] == stable_parser.tokens['NAME']
return dot, node[2][1]
def com_dotted_as_names(self, node):
@@ -947,11 +953,11 @@
def com_import_as_name(self, node):
assert node[0] == symbol.import_as_name
node = node[1:]
- assert node[0][0] == token.NAME
+ assert node[0][0] == stable_parser.tokens['NAME']
if len(node) == 1:
return node[0][1], None
assert node[1][1] == 'as', node
- assert node[2][0] == token.NAME
+ assert node[2][0] == stable_parser.tokens['NAME']
return node[0][1], node[2][1]
def com_import_as_names(self, node):
@@ -994,7 +1000,7 @@
expr1 = expr2 = None
clauses.append((expr1, expr2, self.com_node(nodelist[i+2])))
- if node[0] == token.NAME:
+ if node[0] == stable_parser.tokens['NAME']:
elseNode = self.com_node(nodelist[i+2])
return TryExcept(self.com_node(nodelist[2]), clauses, elseNode,
lineno=nodelist[0][2])
@@ -1038,7 +1044,7 @@
primary = self.com_node(node[1])
for i in range(2, len(node)-1):
ch = node[i]
- if ch[0] == token.DOUBLESTAR:
+ if ch[0] == stable_parser.tokens['DOUBLESTAR']:
self.syntaxerror( "can't assign to operator", node)
primary = self.com_apply_trailer(primary, ch)
return self.com_assign_trailer(primary, node[-1],
@@ -1046,16 +1052,16 @@
node = node[1]
elif t == symbol.atom:
t = node[1][0]
- if t == token.LPAR:
+ if t == stable_parser.tokens['LPAR']:
node = node[2]
- if node[0] == token.RPAR:
+ if node[0] == stable_parser.tokens['RPAR']:
self.syntaxerror( "can't assign to ()", node)
- elif t == token.LSQB:
+ elif t == stable_parser.tokens['LSQB']:
node = node[2]
- if node[0] == token.RSQB:
+ if node[0] == stable_parser.tokens['RSQB']:
self.syntaxerror( "can't assign to []", node)
return self.com_assign_list(node, assigning)
- elif t == token.NAME:
+ elif t == stable_parser.tokens['NAME']:
if node[1][1] == "__debug__":
self.syntaxerror( "can not assign to __debug__", node )
if node[1][1] == "None":
@@ -1081,7 +1087,7 @@
if i + 1 < len(node):
if node[i + 1][0] == symbol.list_for:
self.syntaxerror( "can't assign to list comprehension", node)
- assert node[i + 1][0] == token.COMMA, node[i + 1]
+ assert node[i + 1][0] == stable_parser.tokens['COMMA'], node[i + 1]
assigns.append(self.com_assign(node[i], assigning))
return AssList(assigns, lineno=extractLineNo(node))
@@ -1090,11 +1096,11 @@
def com_assign_trailer(self, primary, node, assigning):
t = node[1][0]
- if t == token.DOT:
+ if t == stable_parser.tokens['DOT']:
return self.com_assign_attr(primary, node[2], assigning)
- if t == token.LSQB:
+ if t == stable_parser.tokens['LSQB']:
return self.com_subscriptlist(primary, node[2], assigning)
- if t == token.LPAR:
+ if t == stable_parser.tokens['LPAR']:
if assigning==OP_DELETE:
self.syntaxerror( "can't delete function call", node)
else:
@@ -1142,7 +1148,7 @@
assert len(nodelist[i:]) == 1
return self.com_list_comprehension(values[0],
nodelist[i])
- elif nodelist[i][0] == token.COMMA:
+ elif nodelist[i][0] == stable_parser.tokens['COMMA']:
continue
values.append(self.com_node(nodelist[i]))
return List(values, lineno=lineno)
@@ -1241,29 +1247,29 @@
def com_apply_trailer(self, primaryNode, nodelist):
t = nodelist[1][0]
- if t == token.LPAR:
+ if t == stable_parser.tokens['LPAR']:
return self.com_call_function(primaryNode, nodelist[2])
- if t == token.DOT:
+ if t == stable_parser.tokens['DOT']:
return self.com_select_member(primaryNode, nodelist[2])
- if t == token.LSQB:
+ if t == stable_parser.tokens['LSQB']:
return self.com_subscriptlist(primaryNode, nodelist[2], OP_APPLY)
self.syntaxerror( 'unknown node type: %s' % t, nodelist[1])
def com_select_member(self, primaryNode, nodelist):
- if nodelist[0] != token.NAME:
+ if nodelist[0] != stable_parser.tokens['NAME']:
self.syntaxerror( "member must be a name", nodelist[0])
return Getattr(primaryNode, nodelist[1], lineno=nodelist[2])
def com_call_function(self, primaryNode, nodelist):
- if nodelist[0] == token.RPAR:
+ if nodelist[0] == stable_parser.tokens['RPAR']:
return CallFunc(primaryNode, [], lineno=extractLineNo(nodelist))
args = []
kw = 0
len_nodelist = len(nodelist)
for i in range(1, len_nodelist, 2):
node = nodelist[i]
- if node[0] == token.STAR or node[0] == token.DOUBLESTAR:
+ if node[0] == stable_parser.tokens['STAR'] or node[0] == stable_parser.tokens['DOUBLESTAR']:
break
kw, result = self.com_argument(node, kw)
@@ -1277,7 +1283,7 @@
else:
# No broken by star arg, so skip the last one we processed.
i = i + 1
- if i < len_nodelist and nodelist[i][0] == token.COMMA:
+ if i < len_nodelist and nodelist[i][0] == stable_parser.tokens['COMMA']:
# need to accept an application that looks like "f(a, b,)"
i = i + 1
star_node = dstar_node = None
@@ -1285,11 +1291,11 @@
tok = nodelist[i]
ch = nodelist[i+1]
i = i + 3
- if tok[0]==token.STAR:
+ if tok[0]==stable_parser.tokens['STAR']:
if star_node is not None:
self.syntaxerror( 'already have the varargs indentifier', tok )
star_node = self.com_node(ch)
- elif tok[0]==token.DOUBLESTAR:
+ elif tok[0]==stable_parser.tokens['DOUBLESTAR']:
if dstar_node is not None:
self.syntaxerror( 'already have the kwargs indentifier', tok )
dstar_node = self.com_node(ch)
@@ -1308,9 +1314,9 @@
return 0, self.com_node(nodelist[1])
result = self.com_node(nodelist[3])
n = nodelist[1]
- while len(n) == 2 and n[0] != token.NAME:
+ while len(n) == 2 and n[0] != stable_parser.tokens['NAME']:
n = n[1]
- if n[0] != token.NAME:
+ if n[0] != stable_parser.tokens['NAME']:
self.syntaxerror( "keyword can't be an expression (%s)"%n[0], n)
node = Keyword(n[1], result, lineno=n[2])
return 1, node
@@ -1324,8 +1330,8 @@
# backwards compat slice for '[i:j]'
if len(nodelist) == 2:
sub = nodelist[1]
- if (sub[1][0] == token.COLON or \
- (len(sub) > 2 and sub[2][0] == token.COLON)) and \
+ if (sub[1][0] == stable_parser.tokens['COLON'] or \
+ (len(sub) > 2 and sub[2][0] == stable_parser.tokens['COLON'])) and \
sub[-1][0] != symbol.sliceop:
return self.com_slice(primary, sub, assigning)
@@ -1339,9 +1345,9 @@
# slice_item: expression | proper_slice | ellipsis
ch = node[1]
t = ch[0]
- if t == token.DOT and node[2][0] == token.DOT:
+ if t == stable_parser.tokens['DOT'] and node[2][0] == stable_parser.tokens['DOT']:
return Ellipsis()
- if t == token.COLON or len(node) > 2:
+ if t == stable_parser.tokens['COLON'] or len(node) > 2:
return self.com_sliceobj(node)
return self.com_node(ch)
@@ -1357,7 +1363,7 @@
items = []
- if node[1][0] == token.COLON:
+ if node[1][0] == stable_parser.tokens['COLON']:
items.append(Const(None))
i = 2
else:
@@ -1385,7 +1391,7 @@
# short_slice: [lower_bound] ":" [upper_bound]
lower = upper = None
if len(node) == 3:
- if node[1][0] == token.COLON:
+ if node[1][0] == stable_parser.tokens['COLON']:
upper = self.com_node(node[2])
else:
lower = self.com_node(node[1])
@@ -1412,7 +1418,7 @@
return self.get_docstring(sub)
return None
if n == symbol.atom:
- if node[0][0] == token.STRING:
+ if node[0][0] == stable_parser.tokens['STRING']:
s = ''
for t in node:
s = s + eval(t[1])
@@ -1449,13 +1455,13 @@
# comp_op: '<' | '>' | '=' | '>=' | '<=' | '<>' | '!=' | '=='
# | 'in' | 'not' 'in' | 'is' | 'is' 'not'
_cmp_types = {
- token.LESS : '<',
- token.GREATER : '>',
- token.EQEQUAL : '==',
- token.EQUAL : '==',
- token.LESSEQUAL : '<=',
- token.GREATEREQUAL : '>=',
- token.NOTEQUAL : '!=',
+ stable_parser.tokens['LESS'] : '<',
+ stable_parser.tokens['GREATER'] : '>',
+ stable_parser.tokens['EQEQUAL'] : '==',
+ stable_parser.tokens['EQUAL'] : '==',
+ stable_parser.tokens['LESSEQUAL'] : '<=',
+ stable_parser.tokens['GREATEREQUAL'] : '>=',
+ stable_parser.tokens['NOTEQUAL'] : '!=',
}
_assign_types = [
@@ -1474,20 +1480,20 @@
symbol.factor,
]
-import types
-_names = {}
-for k, v in sym_name.items():
- _names[k] = v
-for k, v in token.tok_name.items():
- _names[k] = v
-
-def debug_tree(tree):
- l = []
- for elt in tree:
- if type(elt) == types.IntType:
- l.append(_names.get(elt, elt))
- elif type(elt) == types.StringType:
- l.append(elt)
- else:
- l.append(debug_tree(elt))
- return l
+# import types
+# _names = {}
+# for k, v in sym_name.items():
+# _names[k] = v
+# for k, v in token.tok_name.items():
+# _names[k] = v
+#
+# def debug_tree(tree):
+# l = []
+# for elt in tree:
+# if type(elt) == types.IntType:
+# l.append(_names.get(elt, elt))
+# elif type(elt) == types.StringType:
+# l.append(elt)
+# else:
+# l.append(debug_tree(elt))
+# return l
Added: pypy/dist/pypy/module/dyngram/__init__.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/module/dyngram/__init__.py Wed Feb 28 18:30:48 2007
@@ -0,0 +1,12 @@
+"""Mixed module for dynamic grammar modification"""
+
+from pypy.interpreter.mixedmodule import MixedModule
+
+class Module(MixedModule):
+ """dyngram module definition"""
+
+ name = 'dyngram'
+ appleveldefs = {}
+ interpleveldefs = {
+ 'insert_grammar_rule' : 'pypy.interpreter.pycompiler.insert_grammar_rule',
+ }
Modified: pypy/dist/pypy/module/recparser/__init__.py
==============================================================================
--- pypy/dist/pypy/module/recparser/__init__.py (original)
+++ pypy/dist/pypy/module/recparser/__init__.py Wed Feb 28 18:30:48 2007
@@ -47,7 +47,6 @@
'source2ast' : "pyparser.source2ast",
'decode_string_literal': 'pyparser.decode_string_literal',
'install_compiler_hook' : 'pypy.interpreter.pycompiler.install_compiler_hook',
- 'rules' : 'pypy.interpreter.pyparser.pythonparse.grammar_rules',
}
# Automatically exports each AST class
Modified: pypy/dist/pypy/module/recparser/codegen.py
==============================================================================
--- pypy/dist/pypy/module/recparser/codegen.py (original)
+++ pypy/dist/pypy/module/recparser/codegen.py Wed Feb 28 18:30:48 2007
@@ -54,7 +54,7 @@
def get_size(self):
s = 0
- for i in insns:
+ for i in self.insns:
s += i.size()
return s
Modified: pypy/dist/pypy/module/recparser/compat.py
==============================================================================
--- pypy/dist/pypy/module/recparser/compat.py (original)
+++ pypy/dist/pypy/module/recparser/compat.py Wed Feb 28 18:30:48 2007
@@ -1,12 +1,17 @@
"""Compatibility layer for CPython's parser module"""
-from pythonparse import parse_python_source
-from pythonutil import PYTHON_PARSER
+from pypy.interpreter.pyparser.tuplebuilder import TupleBuilder
+from pythonparse import make_pyparser
+from pythonutil import pypy_parse
+import symbol # XXX use PYTHON_PARSER.symbols ?
from compiler import transformer, compile as pycompile
+PYTHON_PARSER = make_pyparser()
+
def suite( source ):
strings = [line+'\n' for line in source.split('\n')]
- builder = parse_python_source( strings, PYTHON_PARSER, "file_input" )
+ builder = TupleBuilder(PYTHON_PARSER)
+ PYTHON_PARSER.parse_source(source, 'exec', builder)
nested_tuples = builder.stack[-1].as_tuple()
if builder.source_encoding is not None:
return (symbol.encoding_decl, nested_tuples, builder.source_encoding)
@@ -16,7 +21,8 @@
def expr( source ):
strings = [line+'\n' for line in source.split('\n')]
- builder = parse_python_source( strings, PYTHON_PARSER, "eval_input" )
+ builder = TupleBuilder(PYTHON_PARSER)
+ PYTHON_PARSER.parse_source(source, 'eval', builder)
nested_tuples = builder.stack[-1].as_tuple()
if builder.source_encoding is not None:
return (symbol.encoding_decl, nested_tuples, builder.source_encoding)
Added: pypy/dist/pypy/module/recparser/hooksamples/constchanger.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/module/recparser/hooksamples/constchanger.py Wed Feb 28 18:30:48 2007
@@ -0,0 +1,21 @@
+class ChangeConstVisitor:
+ def visitConst(self, node):
+ if node.value == 3:
+ node.value = 2
+
+ def defaultvisit(self, node):
+ for child in node.getChildNodes():
+ child.accept(self)
+
+ def __getattr__(self, attrname):
+ if attrname.startswith('visit'):
+ return self.defaultvisit
+ raise AttributeError(attrname)
+
+def threebecomestwo(ast, enc):
+ ast.accept(ChangeConstVisitor())
+ return ast
+
+# install the hook
+import parser
+parser.install_compiler_hook(threebecomestwo)
Added: pypy/dist/pypy/module/recparser/hooksamples/tracer.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/module/recparser/hooksamples/tracer.py Wed Feb 28 18:30:48 2007
@@ -0,0 +1,53 @@
+"""this one logs simple assignments and somewhat clearly shows
+that we need a nice API to define "joinpoints". Maybe a SAX-like
+(i.e. event-based) API ?
+
+XXX: crashes on everything else than simple assignment (AssAttr, etc.)
+"""
+
+from parser import ASTPrintnl, ASTConst, ASTName, ASTAssign
+from parser import install_compiler_hook, source2ast
+
+BEFORE_LOG_SOURCE = """if '%s' in locals() or '%s' in globals():
+ print '(before) %s <--', locals().get('%s', globals().get('%s', '<XXX>'))
+"""
+AFTER_LOG_SOURCE = "print '(after) %s <--', %s"
+
+def get_statements(source):
+ module = source2ast(source)
+ return module.node.nodes
+
+class Tracer:
+ def visitModule(self, module):
+ module.node = module.node.accept(self)
+ return module
+
+ def default(self, node):
+ for child in node.getChildNodes():
+ # let's cheat a bit
+ child.parent = node
+ child.accept(self)
+ return node
+
+ def visitAssName(self, assname):
+ assign = assname
+ while not isinstance(assign, ASTAssign):
+ assign = assign.parent
+ stmt = assign.parent
+ varname = assname.name
+ before_stmts = get_statements(BEFORE_LOG_SOURCE % ((varname,) * 5))
+ after_stmts = get_statements(AFTER_LOG_SOURCE % (varname, varname))
+ stmt.insert_before(assign, before_stmts)
+ stmt.insert_after(assign, after_stmts)
+ return assname
+
+ def __getattr__(self, attrname):
+ if attrname.startswith('visit'):
+ return self.default
+ raise AttributeError('No such attribute: %s' % attrname)
+
+
+def _trace(ast, enc):
+ return ast.accept(Tracer())
+
+install_compiler_hook(_trace)
Modified: pypy/dist/pypy/module/recparser/pyparser.py
==============================================================================
--- pypy/dist/pypy/module/recparser/pyparser.py (original)
+++ pypy/dist/pypy/module/recparser/pyparser.py Wed Feb 28 18:30:48 2007
@@ -8,11 +8,13 @@
from pypy.interpreter.typedef import interp_attrproperty, GetSetProperty
from pypy.interpreter.pycode import PyCode
from pypy.interpreter.pyparser.syntaxtree import TokenNode, SyntaxNode, AbstractSyntaxVisitor
-from pypy.interpreter.pyparser.pythonutil import PYTHON_PARSER
+from pypy.interpreter.pyparser.pythonparse import make_pyparser
from pypy.interpreter.pyparser.error import SyntaxError
from pypy.interpreter.pyparser import grammar, symbol, pytoken
from pypy.interpreter.argument import Arguments
+# backward compat (temp)
+PYTHON_PARSER = make_pyparser()
__all__ = [ "ASTType", "STType", "suite", "expr" ]
@@ -43,14 +45,17 @@
def visit_tokennode( self, node ):
space = self.space
+ tokens = space.default_compiler.parser.tokens
num = node.name
lineno = node.lineno
if node.value is not None:
val = node.value
else:
- if num not in ( pytoken.NEWLINE, pytoken.INDENT,
- pytoken.DEDENT, pytoken.ENDMARKER ):
- val = pytoken.tok_rpunct[num]
+ if num != tokens['NEWLINE'] and \
+ num != tokens['INDENT'] and \
+ num != tokens['DEDENT'] and \
+ num != tokens['ENDMARKER']:
+ val = space.default_compiler.parser.tok_rvalues[num]
else:
val = node.value or ''
if self.line_info:
@@ -145,11 +150,11 @@
totuple = interp2app(STType.descr_totuple),
)
-def parse_python_source(space, source, goal):
- builder = grammar.BaseGrammarBuilder(debug=False, rules=PYTHON_PARSER.rules)
+def parse_python_source(space, source, mode):
+ builder = grammar.BaseGrammarBuilder(debug=False, parser=PYTHON_PARSER)
builder.space = space
try:
- PYTHON_PARSER.parse_source(source, goal, builder )
+ PYTHON_PARSER.parse_source(source, mode, builder )
return builder.stack[-1]
except SyntaxError, e:
raise OperationError(space.w_SyntaxError,
@@ -157,14 +162,14 @@
def suite( space, source ):
# make the annotator life easier (don't use str.splitlines())
- syntaxtree = parse_python_source( space, source, "file_input" )
+ syntaxtree = parse_python_source( space, source, "exec" )
return space.wrap( STType(space, syntaxtree) )
suite.unwrap_spec = [ObjSpace, str]
def expr( space, source ):
# make the annotator life easier (don't use str.splitlines())
- syntaxtree = parse_python_source( space, source, "eval_input" )
+ syntaxtree = parse_python_source( space, source, "eval" )
return space.wrap( STType(space, syntaxtree) )
expr.unwrap_spec = [ObjSpace, str]
@@ -180,7 +185,7 @@
items = space.unpackiterable( w_sequence )
nodetype = space.int_w( items[0] )
is_syntax = True
- if nodetype>=0 and nodetype<pytoken.N_TOKENS:
+ if nodetype>=0 and nodetype < pytoken.N_TOKENS:
is_syntax = False
if is_syntax:
nodes = []
@@ -201,11 +206,8 @@
def source2ast(space, source):
- from pypy.interpreter.pyparser.pythonutil import AstBuilder, PYTHON_PARSER
- builder = AstBuilder(space=space)
- PYTHON_PARSER.parse_source(source, 'file_input', builder)
- ast_tree = builder.rule_stack[-1]
- return space.wrap(ast_tree)
+ from pypy.interpreter.pyparser.pythonutil import source2ast
+ return space.wrap(source2ast(source, 'exec', space=space))
source2ast.unwrap_spec = [ObjSpace, str]
Added: pypy/dist/pypy/module/recparser/test/test_dyn_grammarrules.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/module/recparser/test/test_dyn_grammarrules.py Wed Feb 28 18:30:48 2007
@@ -0,0 +1,66 @@
+from pypy.conftest import gettestobjspace
+
+class AppTest_InsertGrammarRules:
+ def setup_class(cls):
+ space = gettestobjspace(usemodules=('dyngram', 'recparser'))
+ cls.space = space
+
+ def test_do_while(self):
+ import dyngram, parser
+
+ newrules = """
+ compound_stmt: if_stmt | on_stmt | unless_stmt | dountil_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef
+ dountil_stmt: 'do' ':' suite 'until' test
+ unless_stmt: 'unless' test ':' suite
+ on_stmt: 'on' NAME '=' test ':' suite ['else' ':' suite]
+ """
+
+ def build_dountil_stmt(items):
+ """ 'do' ':' suite 'until' ':' test """
+ lineno = items[0].lineno
+ suite = items[2]
+ test = items[-1]
+ while_stmt = parser.ASTWhile(parser.ASTNot(test), suite, None, lineno)
+ return parser.ASTStmt([suite, while_stmt], lineno)
+
+ def build_unless_stmt(its):
+ """ 'unless' test ':' suite """
+ lineno = its[0].lineno
+ return parser.ASTIf([(parser.ASTNot(its[1]), its[3])], None, lineno)
+
+ def make_assignment(var, node):
+ # XXX: consts.OP_APPLY
+ return parser.ASTAssign([parser.ASTAssName('x', 0)], node)
+
+ def build_on_stmt(items):
+ """ 'on' NAME = test ':' suite 'else' ':' suite"""
+ varname = items[1].value
+ test = items[3]
+ suite = items[5]
+ assign = make_assignment(varname, test)
+ if len(items) == 9:
+ else_ = items[-1]
+ else:
+ else_ = None
+ test = parser.ASTIf([(parser.ASTName(varname), suite)], else_, items[0].lineno)
+ return parser.ASTStmt([assign, test], items[0].lineno)
+
+ dyngram.insert_grammar_rule(newrules, {'dountil_stmt' : build_dountil_stmt,
+ 'unless_stmt': build_unless_stmt,
+ 'on_stmt' : build_on_stmt,
+ })
+
+ # now we should be able to use do...until and unless statements
+ d = {}
+ exec '''
+a = 0
+do:
+ a += 1
+until True
+
+b = 0
+unless a == 2: b = 3
+ ''' in d
+ assert d['a'] == 1
+ assert d['b'] == 3
+
Modified: pypy/dist/pypy/module/recparser/test/test_parser.py
==============================================================================
--- pypy/dist/pypy/module/recparser/test/test_parser.py (original)
+++ pypy/dist/pypy/module/recparser/test/test_parser.py Wed Feb 28 18:30:48 2007
@@ -15,3 +15,8 @@
def test_enc_minimal(self):
import parser
parser.suite("# -*- coding: koi8-u -*-*\ngreat()")
+
+ def test_simple_ass_totuple(self):
+ import parser
+ parser.suite("a = 3").totuple()
+
More information about the Pypy-commit
mailing list