r52874 - sandbox/trunk/2to3 sandbox/trunk/2to3/fix_has_key.py sandbox/trunk/2to3/play.py sandbox/trunk/2to3/pynode.py sandbox/trunk/2to3/pytree.py
Author: guido.van.rossum Date: Thu Nov 30 21:41:49 2006 New Revision: 52874 Added: sandbox/trunk/2to3/pytree.py (contents, props changed) Removed: sandbox/trunk/2to3/pynode.py Modified: sandbox/trunk/2to3/ (props changed) sandbox/trunk/2to3/fix_has_key.py sandbox/trunk/2to3/play.py Log: Same idea, different tree node implementation. Modified: sandbox/trunk/2to3/fix_has_key.py ============================================================================== --- sandbox/trunk/2to3/fix_has_key.py (original) +++ sandbox/trunk/2to3/fix_has_key.py Thu Nov 30 21:41:49 2006 @@ -9,25 +9,34 @@ # Python imports import os import sys +import token import logging import pgen2 from pgen2 import driver -import pynode +import pytree -logging.basicConfig(level=logging.WARN) +logging.basicConfig(level=logging.DEBUG) + +gr = driver.load_grammar("Grammar.txt") # used by node initializers + +class Symbols(object): + + def __init__(self, gr): + for name, symbol in gr.symbol2number.iteritems(): + setattr(self, name, symbol) + +syms = Symbols(gr) def main(): args = sys.argv[1:] or ["example.py"] - gr = driver.load_grammar("Grammar.txt") - dr = driver.Driver(gr, convert=pynode.convert) + dr = driver.Driver(gr, convert=pytree.convert) for fn in args: print "Parsing", fn tree = dr.parse_file(fn) - tree.set_parents() refactor(tree) diff(fn, tree) @@ -36,25 +45,26 @@ def visit(node, func): func(node) - for child in node.get_children(): + for child in node.children: visit(child, func) # Sample nodes -_context = ("", (0, 0)) -n_dot = pynode.Token(_context, ".") -n_has_key = pynode.Name(_context, "has_key") -n_trailer_has_key = pynode.trailer(_context, n_dot, n_has_key) -n_lpar = pynode.Token(_context, "(") -n_star = pynode.Token(_context, "*") -n_comma = pynode.Token(_context, ",") -n_in = pynode.Token((" ", (0, 0)), "in") +n_dot = pytree.Leaf(None, token.DOT, ".") +n_has_key = pytree.Leaf(None, token.NAME, "has_key") +n_trailer_has_key = pytree.Node(None, syms.trailer, (n_dot, n_has_key)) +n_lpar = pytree.Leaf(None, token.LPAR, "(") +n_star = pytree.Leaf(None, token.STAR, "*") +n_comma = pytree.Leaf(None, token.COMMA, ",") +n_in = pytree.Leaf((" ", (0, 0)), token.NAME, "in") # XXX what operator? + +import pdb def fix_has_key(node): if node != n_trailer_has_key: return # XXX Could use more DOM manipulation primitives and matching operations parent = node.parent - nodes = parent.get_children() + nodes = parent.children for i, n in enumerate(nodes): if n is node: break @@ -66,17 +76,17 @@ if len(nodes) != i+2: return # Too much follows ".has_key", e.g. ".has_key(x).blah" next = nodes[i+1] - if not isinstance(next, pynode.trailer): + if next.type != syms.trailer: return # ".has_key" not followed by another trailer - next_children = next.get_children() + next_children = next.children if next_children[0] != n_lpar: return # ".has_key" not followed by "(...)" if len(next_children) != 3: return # ".has_key" followed by "()" argsnode = next_children[1] arg = argsnode - if isinstance(argsnode, pynode.arglist): - args = argsnode.get_children() + if argsnode.type != syms.arglist: + args = argsnode.children if len(args) > 2: return # Too many arguments if len(args) == 2: @@ -88,10 +98,8 @@ # Change "X.has_key(Y)" into "Y in X" arg.set_prefix(nodes[0].get_prefix()) nodes[0].set_prefix(" ") - new = pynode.comparison(_context, - arg, - n_in, - pynode.power(_context, *nodes[:i])) + new = pytree.Node(None, syms.comparison, + (arg, n_in, pytree.Node(None, syms.power, nodes[:i]))) # XXX Sometimes we need to parenthesize arg or new. Later. parent.parent.replace(parent, new) Modified: sandbox/trunk/2to3/play.py ============================================================================== --- sandbox/trunk/2to3/play.py (original) +++ sandbox/trunk/2to3/play.py Thu Nov 30 21:41:49 2006 @@ -14,17 +14,16 @@ import pgen2 from pgen2 import driver -import pynode +import pytree logging.basicConfig(level=logging.WARN) def main(): gr = driver.load_grammar("Grammar.txt") - dr = driver.Driver(gr, convert=pynode.convert) + dr = driver.Driver(gr, convert=pytree.convert) fn = "example.py" tree = dr.parse_file(fn, debug=True) - tree.set_parents() sys.stdout.write(str(tree)) if not diff(fn, tree): print "No diffs." Deleted: /sandbox/trunk/2to3/pynode.py ============================================================================== --- /sandbox/trunk/2to3/pynode.py Thu Nov 30 21:41:49 2006 +++ (empty file) @@ -1,547 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -# Modifications: -# Copyright 2006 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Syntax tree node definitions. - -There is a class or function corresponding to each terminal and -nonterminal symbol. - -We use __slots__ to make the parse tree nodes as small as possible. - -NOTE: EVERY CLASS MUST HAVE A __slots__ DEFINITION, EVEN IF EMPTY! -(If not, the instances will get a __dict__, defeating the purpose of -__slots__ in our case.) -""" - -__author__ = "Guido van Rossum <guido@python.org>" - -# Python imports -import token -import logging - -# Pgen imports -from pgen2 import grammar - -# Custom logger -logger = logging.getLogger() - -class Node(object): - - # XXX Should refactor this so that there are only two kinds of nodes, - # Terminal and Nonterminal; each with subclasses to match the grammar - # or perhaps just storing the node type in a slot. - - """Abstract base class for all nodes. - - This has no attributes except a context slot which holds context - info (a tuple of the form (prefix, (lineno, column))), and a - parent slot, which is not set by default but can be set to the - parent node later. - - In order to reduce the amount of boilerplate code, the context is - argument is handled by __new__ rather than __init__. There are - also a few subclasses that override __new__ to sometimes avoid - constructing an instance. - - """ - - __slots__ = ["context", "parent"] - - def __new__(cls, context, *rest): - assert cls not in (Node, Nonterminal, Terminal, Constant) - obj = object.__new__(cls) - obj.context = context - return obj - - def get_children(self): - return () - - def set_parents(self, parent=None): - self.parent = parent - for child in self.get_children(): - child.set_parents(self) - - _stretch = False # Set to true to stretch the repr() vertically - - def __repr__(self, repr_arg=repr): - stretch = self._stretch - r = [self.__class__.__name__] - if stretch: - r.append("(\n ") - else: - r.append("(") # ")" -- crutch for Emacs python-mode :-( - cls = self.__class__ - # Get nearest non-empty slots __slots__. This assumes - # *something* has non-empty __slots__ before we reach object - # (which has no __slots__). The class hierarchy guarantees - # this. - slots = cls.__slots__ - while not slots: - cls = cls.__base__ - slots = cls.__slots__ - first = True - for name in slots: - if name == "context": - continue # Skip this - if first: - first = False - else: - if stretch: - r.append(",\n ") - else: - r.append(", ") - try: - value = getattr(self, name) - except AttributeError: - continue - if stretch and isinstance(value, list): - rr = map(repr_arg, value) - rv = "[" + ",\n ".join(rr) + "]" - else: - rv = repr_arg(value) - if stretch: - rv = rv.replace("\n", "\n ") - r.append(rv) - r.append(")") - return "".join(r) - - def __str__(self): - return self.__repr__(repr_arg=str) - - def __eq__(self, other): - if self.__class__ is not other.__class__: - return NotImplemented - return self.eq(other) - - def __ne__(self, other): - result = self.__eq__(other) - if result is not NotImplemented: - result = not result - return result - - def eq(self, other): - assert self.__class__ is other.__class__ - return self.get_children() == other.get_children() - - def set_prefix(self, new_prefix): - old_prefix, rest = self.context - self.context = (new_prefix, rest) - - def get_prefix(self): - return self.context[0] - -class Nonterminal(Node): - """Abstract base class for nonterminal symbols. - - Nothing beyond Node. - - """ - - __slots__ = [] - - _stretch = True - -class Terminal(Node): - """Abstract base class for terminal symbols. - - Nothing beyond Node. - - """ - - __slots__ = [] - -class Series(Nonterminal): - """Abstract base class for nonterminals like stmts: stmt+.""" - - __slots__ = [] - - def __new__(cls, context, *nodes): - assert cls is not Series - if len(nodes) == 0: - return None - elif len(nodes) == 1: - return nodes[0] - else: - obj = Nonterminal.__new__(cls, context) - obj.init_series(nodes) - return obj - -class Constant(Terminal): - """Abstract base class for constants (e.g. number or string). - - Attributes: - - repr -- a string giving the token, exactly as read from source - - """ - - __slots__ = ["repr"] - - def __init__(self, context, repr): - self.repr = repr - - def __str__(self): - prefix, (lineno, column) = self.context - return prefix + self.repr - - def eq(self, other): - return self.repr == other.repr - -# Node classes for terminal symbols - -class Token(Constant): - """An otherwise unclassified operator or keyword (e.g. '+' or 'if'). - - Attributres: - - repr -- a string giving the token's text. - - """ - - __slots__ = [] - -class Name(Terminal): - """Name (e.g. a variable name or an attribute name). - - Attributes: - - name -- a string giving the name. - - """ - - __slots__ = ["name"] - - def __init__(self, context, name): - self.name = name - - def __str__(self): - prefix, start = self.context - return prefix + self.name - - def eq(self, other): - return self.name == other.name - -class Number(Constant): - """Numeric constant. - - Nothing beyond Constant. - - """ - - __slots__ = [] - -class String(Constant): - """String constant. - - Nothing beyond Constant. - - """ - - __slots__ = [] - -# Nodes and factory functions for Python grammar - -class GenericSeries(Series): - - __slots__ = ["nodes"] - - def init_series(self, nodes): - self.nodes = nodes - - def get_children(self): - return self.nodes - - def __str__(self): - return "".join(map(str, self.nodes)) - - def replace(self, old, new): - self.nodes = tuple((new if n is old else n) for n in self.nodes) - - def set_prefix(self, new_prefix): - Series.set_prefix(self, new_prefix) - self.nodes[0].set_prefix(new_prefix) - - def get_prefix(self): - return self.nodes[0].get_prefix() - -class atom(GenericSeries): - __slots__ = [] - -class power(GenericSeries): - __slots__ = [] - -class factor(GenericSeries): - __slots__ = [] - -class term(GenericSeries): - __slots__ = [] - -class arith_expr(GenericSeries): - __slots__ = [] - -class shift_expr(GenericSeries): - __slots__ = [] - -class and_expr(GenericSeries): - __slots__ = [] - -class xor_expr(GenericSeries): - __slots__ = [] - -class or_expr(GenericSeries): - __slots__ = [] - -class expr(GenericSeries): - __slots__ = [] - -class comparison(GenericSeries): - __slots__ = [] - -class not_test(GenericSeries): - __slots__ = [] - -class and_test(GenericSeries): - __slots__ = [] - -class or_test(GenericSeries): - __slots__ = [] - -class test(GenericSeries): - __slots__ = [] - -class testlist(GenericSeries): - __slots__ = [] - -class expr_stmt(GenericSeries): - __slots__ = [] - -class trailer(GenericSeries): - __slots__ = [] - -class argument(GenericSeries): - __slots__ = [] - -class arglist(GenericSeries): - __slots__ = [] - -class subscript(GenericSeries): - __slots__ = [] - -class subscriptlist(GenericSeries): - __slots__ = [] - -class listmaker(GenericSeries): - __slots__ = [] - -class testlist_gexp(GenericSeries): - __slots__ = [] - -class suite(GenericSeries): - __slots__ = [] - -class if_stmt(GenericSeries): - __slots__ = [] - -class compound_stmt(GenericSeries): - __slots__ = [] - -class parameters(GenericSeries): - __slots__ = [] - -class funcdef(GenericSeries): - __slots__ = [] - -class fpdef(GenericSeries): - __slots__ = [] - -class varargslist(GenericSeries): - __slots__ = [] - -class classdef(GenericSeries): - __slots__ = [] - -class exprlist(GenericSeries): - __slots__ = [] - -class print_stmt(GenericSeries): - __slots__ = [] - -class for_stmt(GenericSeries): - __slots__ = [] - -class dotted_name(GenericSeries): - __slots__ = [] - -class dotted_as_name(GenericSeries): - __slots__ = [] - -class dotted_as_names(GenericSeries): - __slots__ = [] - -class import_as_names(GenericSeries): - __slots__ = [] - -class import_as_name(GenericSeries): - __slots__ = [] - -class import_name(GenericSeries): - __slots__ = [] - -class import_from(GenericSeries): - __slots__ = [] - -class import_stmt(GenericSeries): - __slots__ = [] - -class comp_op(GenericSeries): - __slots__ = [] - -class assert_stmt(GenericSeries): - __slots__ = [] - -class return_stmt(GenericSeries): - __slots__ = [] - -class continue_stmt(GenericSeries): - __slots__ = [] - -class break_stmt(GenericSeries): - __slots__ = [] - -class flow_stmt(GenericSeries): - __slots__ = [] - -class while_stmt(GenericSeries): - __slots__ = [] - -class except_clause(GenericSeries): - __slots__ = [] - -class try_stmt(GenericSeries): - __slots__ = [] - -class dictmaker(GenericSeries): - __slots__ = [] - -class raise_stmt(GenericSeries): - __slots__ = [] - -class del_stmt(GenericSeries): - __slots__ = [] - -class exec_stmt(GenericSeries): - __slots__ = [] - -class augassign(GenericSeries): - __slots__ = [] - -class global_stmt(GenericSeries): - __slots__ = [] - -class fplist(GenericSeries): - __slots__ = [] - -class lambdef(GenericSeries): - __slots__ = [] - -class old_test(GenericSeries): - __slots__ = [] - -class testlist_safe(GenericSeries): - __slots__ = [] - -class list_for(GenericSeries): - __slots__ = [] - -class decorator(GenericSeries): - __slots__ = [] - -class decorators(GenericSeries): - __slots__ = [] - -class yield_expr(GenericSeries): - __slots__ = [] - -class yield_stmt(GenericSeries): - __slots__ = [] - -class list_if(GenericSeries): - __slots__ = [] - -class list_iter(GenericSeries): - __slots__ = [] - -class gen_for(GenericSeries): - __slots__ = [] - -class gen_iter(GenericSeries): - __slots__ = [] - -class gen_if(GenericSeries): - __slots__ = [] - -class with_var(GenericSeries): - __slots__ = [] - -class with_stmt(GenericSeries): - __slots__ = [] - -class sliceop(GenericSeries): - __slots__ = [] - -class testlist1(GenericSeries): - __slots__ = [] - - -def _transparent(context, node, *rest): - assert rest == (), (context, node, rest) - return node - -pass_stmt = _transparent -small_stmt = _transparent -stmt = _transparent - -class simple_stmt(GenericSeries): - __slots__ = [] - -class file_input(GenericSeries): - __slots__ = [] - -def convert(grammar, node): - type, value, context, children = node - # Is it a non-terminal symbol? - if type in grammar.number2symbol: - symbol = grammar.number2symbol[type] - factory = globals().get(symbol) - if factory is None: - raise RuntimeError("can't find factory for %s (line %s)" % - (symbol, context)) - # Debug variation: - try: - return factory(context, *children) - except: - logger.debug("%s %s", factory.__name__, "(") - for child in children: - logger.debug("%s %s", "==>", child) - logger.debug(")") - logger.debug("# Did you remember to declare a 'context' arg?") - raise - return factory(context, *children) - - # Must be a terminal symbol. - if type == token.NAME: - # Name or keyword. Special-case the snot out of this. - if value in grammar.keywords: - # Keywords become Tokens - return Token(context, value) - else: - return Name(context, value) - - assert type in token.tok_name - - # Operators become Tokens - return Token(context, value) Added: sandbox/trunk/2to3/pytree.py ============================================================================== --- (empty file) +++ sandbox/trunk/2to3/pytree.py Thu Nov 30 21:41:49 2006 @@ -0,0 +1,121 @@ +# Copyright 2006 Google, Inc. All Rights Reserved. +# Licensed to PSF under a Contributor Agreement. + +"""Python syntax tree definitions. + +There are two concrete classes: Node, which represents an interior +node, and Leaf, which represents a leaf node. + +The Base class is an abstract base class that provides some default +functionality and boilerplate using the template pattern. +""" + +__author__ = "Guido van Rossum <guido@python.org>" + + +class Base(object): + + parent = None + children = () + + def __new__(cls, *rest): + assert cls is not Base, "Cannot instantiate Base" + return object.__new__(cls, *rest) + + def __eq__(self, other): + if self.__class__ is not other.__class__: + return NotImplemented + return self._eq(other) + + def __ne__(self, other): + if self.__class__ is not other.__class__: + return NotImplemented + return not self._eq(other) + + def _eq(self, other): + raise NotImplementedError + + def set_prefix(self, prefix): + raise NotImplementedError + + def get_prefix(self): + raise NotImplementedError + + +class Node(Base): + + def __init__(self, context, type, children): + self.type = type + self.children = tuple(children) + for ch in self.children: + ch.parent = self + + def __repr__(self): + return "%s(<>, %r, %r)" % (self.__class__.__name__, + self.type, + self.children) + + def __str__(self): + return "".join(str(ch) for ch in self.children) + + def _eq(self, other): + return (self.type, self.children) == (other.type, other.children) + + def set_prefix(self, prefix): + if self.children: + self.children[0].set_prefix(prefix) + + def get_prefix(self): + if not self.children: + return "" + return self.children[0].get_prefix() + + def replace(self, old, new): + l_children = [] + found = False + for ch in self.children: + if ch is old: + assert not found, (self.children, old, new) + l_children.append(new) + found = True + else: + l_children.append(ch) + assert found, (self.children, old, new) + self.children = tuple(l_children) + + +class Leaf(Base): + + def __init__(self, context, type, value): + if context: + self.prefix, (self.lineno, self.column) = context + else: + self.prefix = "" + self.lineno = self.column = 0 + self.type = type + self.value = value + + def __repr__(self): + return "%s(<>, %r, %r)" % (self.__class__.__name__, + self.type, + self.value) + + def __str__(self): + return self.prefix + self.value + + def _eq(self, other): + return (self.type, self.value) == (other.type, other.value) + + def set_prefix(self, prefix): + self.prefix = prefix + + def get_prefix(self): + return self.prefix + + +def convert(gr, raw_node): + type, value, context, children = raw_node + if children or type in gr.number2symbol: + return Node(context, type, children) + else: + return Leaf(context, type, value)
participants (1)
-
guido.van.rossum