Author: guido.van.rossum
Date: Thu Nov 30 21:41:49 2006
New Revision: 52874
Added:
sandbox/trunk/2to3/pytree.py (contents, props changed)
Removed:
sandbox/trunk/2to3/pynode.py
Modified:
sandbox/trunk/2to3/ (props changed)
sandbox/trunk/2to3/fix_has_key.py
sandbox/trunk/2to3/play.py
Log:
Same idea, different tree node implementation.
Modified: sandbox/trunk/2to3/fix_has_key.py
==============================================================================
--- sandbox/trunk/2to3/fix_has_key.py (original)
+++ sandbox/trunk/2to3/fix_has_key.py Thu Nov 30 21:41:49 2006
@@ -9,25 +9,34 @@
# Python imports
import os
import sys
+import token
import logging
import pgen2
from pgen2 import driver
-import pynode
+import pytree
-logging.basicConfig(level=logging.WARN)
+logging.basicConfig(level=logging.DEBUG)
+
+gr = driver.load_grammar("Grammar.txt") # used by node initializers
+
+class Symbols(object):
+
+ def __init__(self, gr):
+ for name, symbol in gr.symbol2number.iteritems():
+ setattr(self, name, symbol)
+
+syms = Symbols(gr)
def main():
args = sys.argv[1:] or ["example.py"]
- gr = driver.load_grammar("Grammar.txt")
- dr = driver.Driver(gr, convert=pynode.convert)
+ dr = driver.Driver(gr, convert=pytree.convert)
for fn in args:
print "Parsing", fn
tree = dr.parse_file(fn)
- tree.set_parents()
refactor(tree)
diff(fn, tree)
@@ -36,25 +45,26 @@
def visit(node, func):
func(node)
- for child in node.get_children():
+ for child in node.children:
visit(child, func)
# Sample nodes
-_context = ("", (0, 0))
-n_dot = pynode.Token(_context, ".")
-n_has_key = pynode.Name(_context, "has_key")
-n_trailer_has_key = pynode.trailer(_context, n_dot, n_has_key)
-n_lpar = pynode.Token(_context, "(")
-n_star = pynode.Token(_context, "*")
-n_comma = pynode.Token(_context, ",")
-n_in = pynode.Token((" ", (0, 0)), "in")
+n_dot = pytree.Leaf(None, token.DOT, ".")
+n_has_key = pytree.Leaf(None, token.NAME, "has_key")
+n_trailer_has_key = pytree.Node(None, syms.trailer, (n_dot, n_has_key))
+n_lpar = pytree.Leaf(None, token.LPAR, "(")
+n_star = pytree.Leaf(None, token.STAR, "*")
+n_comma = pytree.Leaf(None, token.COMMA, ",")
+n_in = pytree.Leaf((" ", (0, 0)), token.NAME, "in") # XXX what operator?
+
+import pdb
def fix_has_key(node):
if node != n_trailer_has_key:
return
# XXX Could use more DOM manipulation primitives and matching operations
parent = node.parent
- nodes = parent.get_children()
+ nodes = parent.children
for i, n in enumerate(nodes):
if n is node:
break
@@ -66,17 +76,17 @@
if len(nodes) != i+2:
return # Too much follows ".has_key", e.g. ".has_key(x).blah"
next = nodes[i+1]
- if not isinstance(next, pynode.trailer):
+ if next.type != syms.trailer:
return # ".has_key" not followed by another trailer
- next_children = next.get_children()
+ next_children = next.children
if next_children[0] != n_lpar:
return # ".has_key" not followed by "(...)"
if len(next_children) != 3:
return # ".has_key" followed by "()"
argsnode = next_children[1]
arg = argsnode
- if isinstance(argsnode, pynode.arglist):
- args = argsnode.get_children()
+ if argsnode.type != syms.arglist:
+ args = argsnode.children
if len(args) > 2:
return # Too many arguments
if len(args) == 2:
@@ -88,10 +98,8 @@
# Change "X.has_key(Y)" into "Y in X"
arg.set_prefix(nodes[0].get_prefix())
nodes[0].set_prefix(" ")
- new = pynode.comparison(_context,
- arg,
- n_in,
- pynode.power(_context, *nodes[:i]))
+ new = pytree.Node(None, syms.comparison,
+ (arg, n_in, pytree.Node(None, syms.power, nodes[:i])))
# XXX Sometimes we need to parenthesize arg or new. Later.
parent.parent.replace(parent, new)
Modified: sandbox/trunk/2to3/play.py
==============================================================================
--- sandbox/trunk/2to3/play.py (original)
+++ sandbox/trunk/2to3/play.py Thu Nov 30 21:41:49 2006
@@ -14,17 +14,16 @@
import pgen2
from pgen2 import driver
-import pynode
+import pytree
logging.basicConfig(level=logging.WARN)
def main():
gr = driver.load_grammar("Grammar.txt")
- dr = driver.Driver(gr, convert=pynode.convert)
+ dr = driver.Driver(gr, convert=pytree.convert)
fn = "example.py"
tree = dr.parse_file(fn, debug=True)
- tree.set_parents()
sys.stdout.write(str(tree))
if not diff(fn, tree):
print "No diffs."
Deleted: /sandbox/trunk/2to3/pynode.py
==============================================================================
--- /sandbox/trunk/2to3/pynode.py Thu Nov 30 21:41:49 2006
+++ (empty file)
@@ -1,547 +0,0 @@
-# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-# Modifications:
-# Copyright 2006 Google, Inc. All Rights Reserved.
-# Licensed to PSF under a Contributor Agreement.
-
-"""Syntax tree node definitions.
-
-There is a class or function corresponding to each terminal and
-nonterminal symbol.
-
-We use __slots__ to make the parse tree nodes as small as possible.
-
-NOTE: EVERY CLASS MUST HAVE A __slots__ DEFINITION, EVEN IF EMPTY!
-(If not, the instances will get a __dict__, defeating the purpose of
-__slots__ in our case.)
-"""
-
-__author__ = "Guido van Rossum <guido(a)python.org>"
-
-# Python imports
-import token
-import logging
-
-# Pgen imports
-from pgen2 import grammar
-
-# Custom logger
-logger = logging.getLogger()
-
-class Node(object):
-
- # XXX Should refactor this so that there are only two kinds of nodes,
- # Terminal and Nonterminal; each with subclasses to match the grammar
- # or perhaps just storing the node type in a slot.
-
- """Abstract base class for all nodes.
-
- This has no attributes except a context slot which holds context
- info (a tuple of the form (prefix, (lineno, column))), and a
- parent slot, which is not set by default but can be set to the
- parent node later.
-
- In order to reduce the amount of boilerplate code, the context is
- argument is handled by __new__ rather than __init__. There are
- also a few subclasses that override __new__ to sometimes avoid
- constructing an instance.
-
- """
-
- __slots__ = ["context", "parent"]
-
- def __new__(cls, context, *rest):
- assert cls not in (Node, Nonterminal, Terminal, Constant)
- obj = object.__new__(cls)
- obj.context = context
- return obj
-
- def get_children(self):
- return ()
-
- def set_parents(self, parent=None):
- self.parent = parent
- for child in self.get_children():
- child.set_parents(self)
-
- _stretch = False # Set to true to stretch the repr() vertically
-
- def __repr__(self, repr_arg=repr):
- stretch = self._stretch
- r = [self.__class__.__name__]
- if stretch:
- r.append("(\n ")
- else:
- r.append("(") # ")" -- crutch for Emacs python-mode :-(
- cls = self.__class__
- # Get nearest non-empty slots __slots__. This assumes
- # *something* has non-empty __slots__ before we reach object
- # (which has no __slots__). The class hierarchy guarantees
- # this.
- slots = cls.__slots__
- while not slots:
- cls = cls.__base__
- slots = cls.__slots__
- first = True
- for name in slots:
- if name == "context":
- continue # Skip this
- if first:
- first = False
- else:
- if stretch:
- r.append(",\n ")
- else:
- r.append(", ")
- try:
- value = getattr(self, name)
- except AttributeError:
- continue
- if stretch and isinstance(value, list):
- rr = map(repr_arg, value)
- rv = "[" + ",\n ".join(rr) + "]"
- else:
- rv = repr_arg(value)
- if stretch:
- rv = rv.replace("\n", "\n ")
- r.append(rv)
- r.append(")")
- return "".join(r)
-
- def __str__(self):
- return self.__repr__(repr_arg=str)
-
- def __eq__(self, other):
- if self.__class__ is not other.__class__:
- return NotImplemented
- return self.eq(other)
-
- def __ne__(self, other):
- result = self.__eq__(other)
- if result is not NotImplemented:
- result = not result
- return result
-
- def eq(self, other):
- assert self.__class__ is other.__class__
- return self.get_children() == other.get_children()
-
- def set_prefix(self, new_prefix):
- old_prefix, rest = self.context
- self.context = (new_prefix, rest)
-
- def get_prefix(self):
- return self.context[0]
-
-class Nonterminal(Node):
- """Abstract base class for nonterminal symbols.
-
- Nothing beyond Node.
-
- """
-
- __slots__ = []
-
- _stretch = True
-
-class Terminal(Node):
- """Abstract base class for terminal symbols.
-
- Nothing beyond Node.
-
- """
-
- __slots__ = []
-
-class Series(Nonterminal):
- """Abstract base class for nonterminals like stmts: stmt+."""
-
- __slots__ = []
-
- def __new__(cls, context, *nodes):
- assert cls is not Series
- if len(nodes) == 0:
- return None
- elif len(nodes) == 1:
- return nodes[0]
- else:
- obj = Nonterminal.__new__(cls, context)
- obj.init_series(nodes)
- return obj
-
-class Constant(Terminal):
- """Abstract base class for constants (e.g. number or string).
-
- Attributes:
-
- repr -- a string giving the token, exactly as read from source
-
- """
-
- __slots__ = ["repr"]
-
- def __init__(self, context, repr):
- self.repr = repr
-
- def __str__(self):
- prefix, (lineno, column) = self.context
- return prefix + self.repr
-
- def eq(self, other):
- return self.repr == other.repr
-
-# Node classes for terminal symbols
-
-class Token(Constant):
- """An otherwise unclassified operator or keyword (e.g. '+' or 'if').
-
- Attributres:
-
- repr -- a string giving the token's text.
-
- """
-
- __slots__ = []
-
-class Name(Terminal):
- """Name (e.g. a variable name or an attribute name).
-
- Attributes:
-
- name -- a string giving the name.
-
- """
-
- __slots__ = ["name"]
-
- def __init__(self, context, name):
- self.name = name
-
- def __str__(self):
- prefix, start = self.context
- return prefix + self.name
-
- def eq(self, other):
- return self.name == other.name
-
-class Number(Constant):
- """Numeric constant.
-
- Nothing beyond Constant.
-
- """
-
- __slots__ = []
-
-class String(Constant):
- """String constant.
-
- Nothing beyond Constant.
-
- """
-
- __slots__ = []
-
-# Nodes and factory functions for Python grammar
-
-class GenericSeries(Series):
-
- __slots__ = ["nodes"]
-
- def init_series(self, nodes):
- self.nodes = nodes
-
- def get_children(self):
- return self.nodes
-
- def __str__(self):
- return "".join(map(str, self.nodes))
-
- def replace(self, old, new):
- self.nodes = tuple((new if n is old else n) for n in self.nodes)
-
- def set_prefix(self, new_prefix):
- Series.set_prefix(self, new_prefix)
- self.nodes[0].set_prefix(new_prefix)
-
- def get_prefix(self):
- return self.nodes[0].get_prefix()
-
-class atom(GenericSeries):
- __slots__ = []
-
-class power(GenericSeries):
- __slots__ = []
-
-class factor(GenericSeries):
- __slots__ = []
-
-class term(GenericSeries):
- __slots__ = []
-
-class arith_expr(GenericSeries):
- __slots__ = []
-
-class shift_expr(GenericSeries):
- __slots__ = []
-
-class and_expr(GenericSeries):
- __slots__ = []
-
-class xor_expr(GenericSeries):
- __slots__ = []
-
-class or_expr(GenericSeries):
- __slots__ = []
-
-class expr(GenericSeries):
- __slots__ = []
-
-class comparison(GenericSeries):
- __slots__ = []
-
-class not_test(GenericSeries):
- __slots__ = []
-
-class and_test(GenericSeries):
- __slots__ = []
-
-class or_test(GenericSeries):
- __slots__ = []
-
-class test(GenericSeries):
- __slots__ = []
-
-class testlist(GenericSeries):
- __slots__ = []
-
-class expr_stmt(GenericSeries):
- __slots__ = []
-
-class trailer(GenericSeries):
- __slots__ = []
-
-class argument(GenericSeries):
- __slots__ = []
-
-class arglist(GenericSeries):
- __slots__ = []
-
-class subscript(GenericSeries):
- __slots__ = []
-
-class subscriptlist(GenericSeries):
- __slots__ = []
-
-class listmaker(GenericSeries):
- __slots__ = []
-
-class testlist_gexp(GenericSeries):
- __slots__ = []
-
-class suite(GenericSeries):
- __slots__ = []
-
-class if_stmt(GenericSeries):
- __slots__ = []
-
-class compound_stmt(GenericSeries):
- __slots__ = []
-
-class parameters(GenericSeries):
- __slots__ = []
-
-class funcdef(GenericSeries):
- __slots__ = []
-
-class fpdef(GenericSeries):
- __slots__ = []
-
-class varargslist(GenericSeries):
- __slots__ = []
-
-class classdef(GenericSeries):
- __slots__ = []
-
-class exprlist(GenericSeries):
- __slots__ = []
-
-class print_stmt(GenericSeries):
- __slots__ = []
-
-class for_stmt(GenericSeries):
- __slots__ = []
-
-class dotted_name(GenericSeries):
- __slots__ = []
-
-class dotted_as_name(GenericSeries):
- __slots__ = []
-
-class dotted_as_names(GenericSeries):
- __slots__ = []
-
-class import_as_names(GenericSeries):
- __slots__ = []
-
-class import_as_name(GenericSeries):
- __slots__ = []
-
-class import_name(GenericSeries):
- __slots__ = []
-
-class import_from(GenericSeries):
- __slots__ = []
-
-class import_stmt(GenericSeries):
- __slots__ = []
-
-class comp_op(GenericSeries):
- __slots__ = []
-
-class assert_stmt(GenericSeries):
- __slots__ = []
-
-class return_stmt(GenericSeries):
- __slots__ = []
-
-class continue_stmt(GenericSeries):
- __slots__ = []
-
-class break_stmt(GenericSeries):
- __slots__ = []
-
-class flow_stmt(GenericSeries):
- __slots__ = []
-
-class while_stmt(GenericSeries):
- __slots__ = []
-
-class except_clause(GenericSeries):
- __slots__ = []
-
-class try_stmt(GenericSeries):
- __slots__ = []
-
-class dictmaker(GenericSeries):
- __slots__ = []
-
-class raise_stmt(GenericSeries):
- __slots__ = []
-
-class del_stmt(GenericSeries):
- __slots__ = []
-
-class exec_stmt(GenericSeries):
- __slots__ = []
-
-class augassign(GenericSeries):
- __slots__ = []
-
-class global_stmt(GenericSeries):
- __slots__ = []
-
-class fplist(GenericSeries):
- __slots__ = []
-
-class lambdef(GenericSeries):
- __slots__ = []
-
-class old_test(GenericSeries):
- __slots__ = []
-
-class testlist_safe(GenericSeries):
- __slots__ = []
-
-class list_for(GenericSeries):
- __slots__ = []
-
-class decorator(GenericSeries):
- __slots__ = []
-
-class decorators(GenericSeries):
- __slots__ = []
-
-class yield_expr(GenericSeries):
- __slots__ = []
-
-class yield_stmt(GenericSeries):
- __slots__ = []
-
-class list_if(GenericSeries):
- __slots__ = []
-
-class list_iter(GenericSeries):
- __slots__ = []
-
-class gen_for(GenericSeries):
- __slots__ = []
-
-class gen_iter(GenericSeries):
- __slots__ = []
-
-class gen_if(GenericSeries):
- __slots__ = []
-
-class with_var(GenericSeries):
- __slots__ = []
-
-class with_stmt(GenericSeries):
- __slots__ = []
-
-class sliceop(GenericSeries):
- __slots__ = []
-
-class testlist1(GenericSeries):
- __slots__ = []
-
-
-def _transparent(context, node, *rest):
- assert rest == (), (context, node, rest)
- return node
-
-pass_stmt = _transparent
-small_stmt = _transparent
-stmt = _transparent
-
-class simple_stmt(GenericSeries):
- __slots__ = []
-
-class file_input(GenericSeries):
- __slots__ = []
-
-def convert(grammar, node):
- type, value, context, children = node
- # Is it a non-terminal symbol?
- if type in grammar.number2symbol:
- symbol = grammar.number2symbol[type]
- factory = globals().get(symbol)
- if factory is None:
- raise RuntimeError("can't find factory for %s (line %s)" %
- (symbol, context))
- # Debug variation:
- try:
- return factory(context, *children)
- except:
- logger.debug("%s %s", factory.__name__, "(")
- for child in children:
- logger.debug("%s %s", "==>", child)
- logger.debug(")")
- logger.debug("# Did you remember to declare a 'context' arg?")
- raise
- return factory(context, *children)
-
- # Must be a terminal symbol.
- if type == token.NAME:
- # Name or keyword. Special-case the snot out of this.
- if value in grammar.keywords:
- # Keywords become Tokens
- return Token(context, value)
- else:
- return Name(context, value)
-
- assert type in token.tok_name
-
- # Operators become Tokens
- return Token(context, value)
Added: sandbox/trunk/2to3/pytree.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/2to3/pytree.py Thu Nov 30 21:41:49 2006
@@ -0,0 +1,121 @@
+# Copyright 2006 Google, Inc. All Rights Reserved.
+# Licensed to PSF under a Contributor Agreement.
+
+"""Python syntax tree definitions.
+
+There are two concrete classes: Node, which represents an interior
+node, and Leaf, which represents a leaf node.
+
+The Base class is an abstract base class that provides some default
+functionality and boilerplate using the template pattern.
+"""
+
+__author__ = "Guido van Rossum <guido(a)python.org>"
+
+
+class Base(object):
+
+ parent = None
+ children = ()
+
+ def __new__(cls, *rest):
+ assert cls is not Base, "Cannot instantiate Base"
+ return object.__new__(cls, *rest)
+
+ def __eq__(self, other):
+ if self.__class__ is not other.__class__:
+ return NotImplemented
+ return self._eq(other)
+
+ def __ne__(self, other):
+ if self.__class__ is not other.__class__:
+ return NotImplemented
+ return not self._eq(other)
+
+ def _eq(self, other):
+ raise NotImplementedError
+
+ def set_prefix(self, prefix):
+ raise NotImplementedError
+
+ def get_prefix(self):
+ raise NotImplementedError
+
+
+class Node(Base):
+
+ def __init__(self, context, type, children):
+ self.type = type
+ self.children = tuple(children)
+ for ch in self.children:
+ ch.parent = self
+
+ def __repr__(self):
+ return "%s(<>, %r, %r)" % (self.__class__.__name__,
+ self.type,
+ self.children)
+
+ def __str__(self):
+ return "".join(str(ch) for ch in self.children)
+
+ def _eq(self, other):
+ return (self.type, self.children) == (other.type, other.children)
+
+ def set_prefix(self, prefix):
+ if self.children:
+ self.children[0].set_prefix(prefix)
+
+ def get_prefix(self):
+ if not self.children:
+ return ""
+ return self.children[0].get_prefix()
+
+ def replace(self, old, new):
+ l_children = []
+ found = False
+ for ch in self.children:
+ if ch is old:
+ assert not found, (self.children, old, new)
+ l_children.append(new)
+ found = True
+ else:
+ l_children.append(ch)
+ assert found, (self.children, old, new)
+ self.children = tuple(l_children)
+
+
+class Leaf(Base):
+
+ def __init__(self, context, type, value):
+ if context:
+ self.prefix, (self.lineno, self.column) = context
+ else:
+ self.prefix = ""
+ self.lineno = self.column = 0
+ self.type = type
+ self.value = value
+
+ def __repr__(self):
+ return "%s(<>, %r, %r)" % (self.__class__.__name__,
+ self.type,
+ self.value)
+
+ def __str__(self):
+ return self.prefix + self.value
+
+ def _eq(self, other):
+ return (self.type, self.value) == (other.type, other.value)
+
+ def set_prefix(self, prefix):
+ self.prefix = prefix
+
+ def get_prefix(self):
+ return self.prefix
+
+
+def convert(gr, raw_node):
+ type, value, context, children = raw_node
+ if children or type in gr.number2symbol:
+ return Node(context, type, children)
+ else:
+ return Leaf(context, type, value)