Author: guido.van.rossum
Date: Thu Nov 30 07:15:54 2006
New Revision: 52868
Added:
sandbox/trunk/2to3/README (contents, props changed)
sandbox/trunk/2to3/fix_has_key.py (contents, props changed)
Modified:
sandbox/trunk/2to3/example.py
sandbox/trunk/2to3/pgen2/driver.py
sandbox/trunk/2to3/pgen2/parse.py
sandbox/trunk/2to3/pgen2/test.py
sandbox/trunk/2to3/play.py (contents, props changed)
sandbox/trunk/2to3/pynode.py
Log:
Got a working 'has_key' refactoring, on a part with ../refactor/has_key.py.
Added: sandbox/trunk/2to3/README
==============================================================================
--- (empty file)
+++ sandbox/trunk/2to3/README Thu Nov 30 07:15:54 2006
@@ -0,0 +1,21 @@
+A refactoring tool for converting Python 2.x code to 3.0.
+
+This is a prototype!
+
+Files:
+
+play.py - test program
+pynode.py - parse tree nodes for Python grammar
+tokenize.py - modified version of stdlib tokenize.py (1)
+fix_has_key.py - refactoring tool changing 'x.has_key(y)' into 'y in x'
+example.py - example input
+Grammar.txt - Grammar input (same as Grammar/Grammar in Python 2.5)
+Grammar.pickle - Pickled grammar tables
+pgen2/ - Parser generator and driver (2)
+
+Notes:
+
+(1) tokenize.py was modified to yield a NL pseudo-token for backslash
+ continuations, so the original source can be reproduced exactly.
+(2) pgen2 was developed while I was at Elemental Security. It was
+ modified to suit the needs of this refactoring tool.
Modified: sandbox/trunk/2to3/example.py
==============================================================================
--- sandbox/trunk/2to3/example.py (original)
+++ sandbox/trunk/2to3/example.py Thu Nov 30 07:15:54 2006
@@ -1,8 +1,8 @@
#!/usr/bin/python
-"""Docstring.
-"""
-print (12 + # Hello
- # world
+"""Docstring."""
+
+d = {"x": 42}
+if d.has_key("x") or d.has_key("y"):
+ print d["x"]
- 12)
# This is the last line.
Added: sandbox/trunk/2to3/fix_has_key.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/2to3/fix_has_key.py Thu Nov 30 07:15:54 2006
@@ -0,0 +1,109 @@
+#!/usr/bin/env python2.5
+# Copyright 2006 Python Software Foundation. All Rights Reserved.
+
+"""Refactoring tool: change 'x.has_key(y)' into 'y in x'."""
+
+__author__ = "Guido van Rossum <guido(a)python.org>"
+
+# Python imports
+import os
+import sys
+import logging
+
+import pgen2
+from pgen2 import driver
+
+import pynode
+
+logging.basicConfig(level=logging.WARN)
+
+def main():
+ args = sys.argv[1:] or ["example.py"]
+
+ gr = driver.load_grammar("Grammar.txt")
+ dr = driver.Driver(gr, convert=pynode.convert)
+
+ for fn in args:
+ print "Parsing", fn
+ tree = dr.parse_file(fn)
+ tree.set_parents()
+ refactor(tree)
+ diff(fn, tree)
+
+def refactor(tree):
+ visit(tree, fix_has_key)
+
+def visit(node, func):
+ func(node)
+ for child in node.get_children():
+ visit(child, func)
+
+# Sample nodes
+_context = ("", (0, 0))
+n_dot = pynode.Token(_context, ".")
+n_has_key = pynode.Name(_context, "has_key")
+n_trailer_has_key = pynode.trailer(_context, n_dot, n_has_key)
+n_lpar = pynode.Token(_context, "(")
+n_star = pynode.Token(_context, "*")
+n_comma = pynode.Token(_context, ",")
+n_in = pynode.Token((" ", (0, 0)), "in")
+
+def fix_has_key(node):
+ if node != n_trailer_has_key:
+ return
+ # XXX Could use more DOM manipulation primitives and matching operations
+ parent = node.parent
+ nodes = parent.get_children()
+ for i, n in enumerate(nodes):
+ if n is node:
+ break
+ else:
+ print "Can't find node in parent?!"
+ return
+ if i+1 >= len(nodes):
+ return # Nothing follows ".has_key"
+ if len(nodes) != i+2:
+ return # Too much follows ".has_key", e.g. ".has_key(x).blah"
+ next = nodes[i+1]
+ if not isinstance(next, pynode.trailer):
+ return # ".has_key" not followed by another trailer
+ next_children = next.get_children()
+ if next_children[0] != n_lpar:
+ return # ".has_key" not followed by "(...)"
+ if len(next_children) != 3:
+ return # ".has_key" followed by "()"
+ argsnode = next_children[1]
+ arg = argsnode
+ if isinstance(argsnode, pynode.arglist):
+ args = argsnode.get_children()
+ if len(args) > 2:
+ return # Too many arguments
+ if len(args) == 2:
+ if args[0] == n_star:
+ return # .has_key(*foo) -- you've gotta be kidding!
+ if args[1] != n_comma:
+ return # Only .has_key(foo,) expected
+ arg = args[0]
+ # Change "X.has_key(Y)" into "Y in X"
+ arg.set_prefix(nodes[0].get_prefix())
+ nodes[0].set_prefix(" ")
+ new = pynode.comparison(_context,
+ arg,
+ n_in,
+ pynode.power(_context, *nodes[:i]))
+ # XXX Sometimes we need to parenthesize arg or new. Later.
+ parent.parent.replace(parent, new)
+
+def diff(fn, tree):
+ f = open("@", "w")
+ try:
+ f.write(str(tree))
+ finally:
+ f.close()
+ try:
+ return os.system("diff -u %s @" % fn)
+ finally:
+ os.remove("@")
+
+if __name__ == "__main__":
+ main()
Modified: sandbox/trunk/2to3/pgen2/driver.py
==============================================================================
--- sandbox/trunk/2to3/pgen2/driver.py (original)
+++ sandbox/trunk/2to3/pgen2/driver.py Thu Nov 30 07:15:54 2006
@@ -22,7 +22,6 @@
# Pgen imports
from pgen2 import parse
-from pgen2 import astnode
from pgen2 import grammar
class Driver(object):
Modified: sandbox/trunk/2to3/pgen2/parse.py
==============================================================================
--- sandbox/trunk/2to3/pgen2/parse.py (original)
+++ sandbox/trunk/2to3/pgen2/parse.py Thu Nov 30 07:15:54 2006
@@ -80,9 +80,7 @@
symbols, and None for tokens.
An abstract syntax tree node may be anything; this is entirely
- up to the converter function. For example, it can be an
- instance of a subclass of the astnode.Node class (see the
- astnode module).
+ up to the converter function.
"""
self.grammar = grammar
Modified: sandbox/trunk/2to3/pgen2/test.py
==============================================================================
--- sandbox/trunk/2to3/pgen2/test.py (original)
+++ sandbox/trunk/2to3/pgen2/test.py Thu Nov 30 07:15:54 2006
@@ -1,6 +1,9 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
+# Modifications:
+# Copyright 2006 Python Software Foundation. All Rights Reserved.
+
def test():
import sys
sys.path[0] = ".."
@@ -11,7 +14,8 @@
finally:
f.close()
sample = "year<=1989 ? ('Modula-3' + ABC) ** 2 : Python"
- tree = driver.parse_string(sample, True)
+ dr = driver.Driver(driver.load_grammar())
+ tree = dr.parse_string(sample, True)
print tree
if __name__ == "__main__":
Modified: sandbox/trunk/2to3/play.py
==============================================================================
--- sandbox/trunk/2to3/play.py (original)
+++ sandbox/trunk/2to3/play.py Thu Nov 30 07:15:54 2006
@@ -1,9 +1,7 @@
#!/usr/bin/env python2.5
# Copyright 2006 Python Software Foundation. All Rights Reserved.
-"""XXX."""
-
-##from __future__ import with_statement
+"""Main program for testing the infrastructure."""
__author__ = "Guido van Rossum <guido(a)python.org>"
@@ -19,22 +17,17 @@
logging.basicConfig(level=logging.WARN)
-def diff(fn, tree):
- f = open("@", "w")
- try:
- f.write(str(tree))
- finally:
- f.close()
- return os.system("diff -u %s @" % fn)
-
def main():
gr = driver.load_grammar("Grammar.txt")
dr = driver.Driver(gr, convert=pynode.convert)
tree = dr.parse_file("example.py", debug=True)
+ tree.set_parents()
sys.stdout.write(str(tree))
return # Comment out to run the complete test suite below
+ problems = []
+
# Process every imported module
for name in sys.modules:
mod = sys.modules[name]
@@ -47,7 +40,8 @@
continue
print >>sys.stderr, "Parsing", fn
tree = dr.parse_file(fn, debug=True)
- diff(fn, tree)
+ if diff(fn, tree):
+ problems.append(fn)
# Process every single module on sys.path (but not in packages)
for dir in sys.path:
@@ -66,7 +60,27 @@
except pgen2.parse.ParseError, err:
print "ParseError:", err
else:
- diff(fn, tree)
+ if diff(fn, tree):
+ problems.append(fn)
+
+ # Show summary of problem files
+ if not problems:
+ print "No problems. Congratulations!"
+ else:
+ print "Problems in following files:"
+ for fn in problems:
+ print "***", fn
+
+def diff(fn, tree):
+ f = open("@", "w")
+ try:
+ f.write(str(tree))
+ finally:
+ f.close()
+ try:
+ return os.system("diff -u %s @" % fn)
+ finally:
+ os.remove("@")
if __name__ == "__main__":
main()
Modified: sandbox/trunk/2to3/pynode.py
==============================================================================
--- sandbox/trunk/2to3/pynode.py (original)
+++ sandbox/trunk/2to3/pynode.py Thu Nov 30 07:15:54 2006
@@ -29,15 +29,17 @@
logger = logging.getLogger()
class Node(object):
+
+ # XXX Should refactor this so that there are only two kinds of nodes,
+ # Terminal and Nonterminal; each with subclasses to match the grammar
+ # or perhaps just storing the node type in a slot.
+
"""Abstract base class for all nodes.
- This has no attributes except a context slot which holds the line
- number (or more detailed context info). In the future this might
- change this to several slots (e.g. filename, lineno, column, or
- even filename, start_lineno, start_column, end_lineno,
- end_column). The context is only referenced by two places: the
- part of the code that sticks it in, and the part of the code that
- reports errors.
+ This has no attributes except a context slot which holds context
+ info (a tuple of the form (prefix, (lineno, column))), and a
+ parent slot, which is not set by default but can be set to the
+ parent node later.
In order to reduce the amount of boilerplate code, the context is
argument is handled by __new__ rather than __init__. There are
@@ -46,7 +48,7 @@
"""
- __slots__ = ["context"]
+ __slots__ = ["context", "parent"]
def __new__(cls, context, *rest):
assert cls not in (Node, Nonterminal, Terminal, Constant)
@@ -54,6 +56,14 @@
obj.context = context
return obj
+ def get_children(self):
+ return ()
+
+ def set_parents(self, parent=None):
+ self.parent = parent
+ for child in self.get_children():
+ child.set_parents(self)
+
_stretch = False # Set to true to stretch the repr() vertically
def __repr__(self, repr_arg=repr):
@@ -101,6 +111,28 @@
def __str__(self):
return self.__repr__(repr_arg=str)
+ def __eq__(self, other):
+ if self.__class__ is not other.__class__:
+ return NotImplemented
+ return self.eq(other)
+
+ def __ne__(self, other):
+ result = self.__eq__(other)
+ if result is not NotImplemented:
+ result = not result
+ return result
+
+ def eq(self, other):
+ assert self.__class__ is other.__class__
+ return self.get_children() == other.get_children()
+
+ def set_prefix(self, new_prefix):
+ old_prefix, rest = self.context
+ self.context = (new_prefix, rest)
+
+ def get_prefix(self):
+ return self.context[0]
+
class Nonterminal(Node):
"""Abstract base class for nonterminal symbols.
@@ -134,7 +166,7 @@
return nodes[0]
else:
obj = Nonterminal.__new__(cls, context)
- obj.initseries(nodes)
+ obj.init_series(nodes)
return obj
class Constant(Terminal):
@@ -152,9 +184,12 @@
self.repr = repr
def __str__(self):
- prefix, start = self.context
+ prefix, (lineno, column) = self.context
return prefix + self.repr
+ def eq(self, other):
+ return self.repr == other.repr
+
# Node classes for terminal symbols
class Token(Constant):
@@ -186,6 +221,9 @@
prefix, start = self.context
return prefix + self.name
+ def eq(self, other):
+ return self.name == other.name
+
class Number(Constant):
"""Numeric constant.
@@ -207,12 +245,28 @@
# Nodes and factory functions for Python grammar
class GenericSeries(Series):
+
__slots__ = ["nodes"]
- def initseries(self, nodes):
+
+ def init_series(self, nodes):
self.nodes = nodes
+
+ def get_children(self):
+ return self.nodes
+
def __str__(self):
return "".join(map(str, self.nodes))
+ def replace(self, old, new):
+ self.nodes = tuple((new if n is old else n) for n in self.nodes)
+
+ def set_prefix(self, new_prefix):
+ Series.set_prefix(self, new_prefix)
+ self.nodes[0].set_prefix(new_prefix)
+
+ def get_prefix(self):
+ return self.nodes[0].get_prefix()
+
class atom(GenericSeries):
__slots__ = []