[Python-checkins] r52866 - in sandbox/trunk/2to3: example.py pgen2/driver.py play.py pynode.py tokenize.py

Thu Nov 30 02:48:56 CET 2006

Author: guido.van.rossum
Date: Thu Nov 30 02:48:54 2006
New Revision: 52866

Modified:
   sandbox/trunk/2to3/example.py
   sandbox/trunk/2to3/pgen2/driver.py
   sandbox/trunk/2to3/play.py
   sandbox/trunk/2to3/pynode.py
   sandbox/trunk/2to3/tokenize.py
Log:
We now correctly render all of the standard library's top-level modules,
except for files containing ^L characters.


Modified: sandbox/trunk/2to3/example.py
==============================================================================

--- sandbox/trunk/2to3/example.py	(original)
+++ sandbox/trunk/2to3/example.py	Thu Nov 30 02:48:54 2006
@@ -1 +1,8 @@
-print (12 + 12)
+#!/usr/bin/python
+"""Docstring.
+"""
+print (12 + # Hello
+            # world
+
+       12)
+# This is the last line.

Modified: sandbox/trunk/2to3/pgen2/driver.py
==============================================================================
--- sandbox/trunk/2to3/pgen2/driver.py	(original)
+++ sandbox/trunk/2to3/pgen2/driver.py	Thu Nov 30 02:48:54 2006
@@ -38,19 +38,42 @@
         """Parse a stream and return the concrete syntax tree."""
         p = parse.Parser(self.grammar, self.convert)
         p.setup()
-        t = v = x = None
-        # (t, v, x, y, z) == (type, value, start, end, line)
-        for t, v, x, y, z in tokenize.generate_tokens(stream.readline):
-            if t in (tokenize.COMMENT, tokenize.NL):
+        lineno = 1
+        column = 0
+        type = value = start = end = line_text = None
+        prefix = ""
+        for quintuple in tokenize.generate_tokens(stream.readline):
+            type, value, start, end, line_text = quintuple
+            if start != (lineno, column):
+                assert (lineno, column) <= start, ((lineno, column), start)
+                s_lineno, s_column = start
+                if lineno < s_lineno:
+                    prefix += "\n" * (s_lineno - lineno)
+                    lineno = s_lineno
+                    column = 0
+                if column < s_column:
+                    prefix += " " * (s_column - column)
+                    column = s_column
+            if type in (tokenize.COMMENT, tokenize.NL):
+                prefix += value
+                lineno, column = end
+                if value.endswith("\n"):
+                    lineno += 1
+                    column = 0
                 continue
-            if t == token.OP:
-                t = grammar.opmap[v]
+            if type == token.OP:
+                type = grammar.opmap[value]
             if debug:
-                self.logger.debug("%s %r", token.tok_name[t], v)
-            if p.addtoken(t, v, x):
+                self.logger.debug("%s %r", token.tok_name[type], value)
+            if p.addtoken(type, value, (prefix, start)):
                 if debug:
                     self.logger.debug("Stop.")
                 break
+            prefix = ""
+            lineno, column = end
+            if value.endswith("\n"):
+                lineno += 1
+                column = 0
         else:
             # We never broke out -- EOF is too soon (how can this happen???)
             raise parse.ParseError("incomplete input", t, v, x)

Modified: sandbox/trunk/2to3/play.py
==============================================================================
--- sandbox/trunk/2to3/play.py	(original)
+++ sandbox/trunk/2to3/play.py	Thu Nov 30 02:48:54 2006
@@ -19,41 +19,54 @@
 
 logging.basicConfig(level=logging.WARN)
 
+def diff(fn, tree):
+    f = open("@", "w")
+    try:
+        f.write(str(tree))
+    finally:
+        f.close()
+    return os.system("diff -u %s @" % fn)
+
 def main():
     gr = driver.load_grammar("Grammar.txt")
     dr = driver.Driver(gr, convert=pynode.convert)
 
     tree = dr.parse_file("example.py", debug=True)
-    print tree
+    sys.stdout.write(str(tree))
+    return
 
-##     # Process every imported module
-##     for name in sys.modules:
-##         mod = sys.modules[name]
-##         if mod is None or not hasattr(mod, "__file__"):
-##             continue
-##         fn = mod.__file__
-##         if fn.endswith(".pyc"):
-##             fn = fn[:-1]
-##         if not fn.endswith(".py"):
-##             continue
-##         print >>sys.stderr, "Parsing", fn
-##         dr.parse_file(fn, debug=True)
-
-##     # Process every single module on sys.path (but not in packages)
-##     for dir in sys.path:
-##         try:
-##             names = os.listdir(dir)
-##         except os.error:
-##             continue
-##         print >>sys.stderr, "Scanning", dir, "..."
-##         for name in names:
-##             if not name.endswith(".py"):
-##                 continue
-##             print >>sys.stderr, "Parsing", name
-##             try:
-##                 dr.parse_file(os.path.join(dir, name), debug=True)
-##             except pgen2.parse.ParseError, err:
-##                 print "ParseError:", err
+    # Process every imported module
+    for name in sys.modules:
+        mod = sys.modules[name]
+        if mod is None or not hasattr(mod, "__file__"):
+            continue
+        fn = mod.__file__
+        if fn.endswith(".pyc"):
+            fn = fn[:-1]
+        if not fn.endswith(".py"):
+            continue
+        print >>sys.stderr, "Parsing", fn
+        tree = dr.parse_file(fn, debug=True)
+        diff(fn, tree)
+
+    # Process every single module on sys.path (but not in packages)
+    for dir in sys.path:
+        try:
+            names = os.listdir(dir)
+        except os.error:
+            continue
+        print >>sys.stderr, "Scanning", dir, "..."
+        for name in names:
+            if not name.endswith(".py"):
+                continue
+            print >>sys.stderr, "Parsing", name
+            fn = os.path.join(dir, name)
+            try:
+                tree = dr.parse_file(fn, debug=True)
+            except pgen2.parse.ParseError, err:
+                print "ParseError:", err
+            else:
+                diff(fn, tree)
 
 if __name__ == "__main__":
     main()

Modified: sandbox/trunk/2to3/pynode.py
==============================================================================
--- sandbox/trunk/2to3/pynode.py	(original)
+++ sandbox/trunk/2to3/pynode.py	Thu Nov 30 02:48:54 2006
@@ -61,7 +61,7 @@
 
     _stretch = False # Set to true to stretch the repr() vertically
 
-    def __repr__(self, repr=repr):
+    def __repr__(self, repr_arg=repr):
         stretch = self._stretch
         r = [self.__class__.__name__]
         if stretch:
@@ -93,10 +93,10 @@
             except AttributeError:
                 continue
             if stretch and isinstance(value, list):
-                rr = map(repr, value)
+                rr = map(repr_arg, value)
                 rv = "[" + ",\n ".join(rr) + "]"
             else:
-                rv = repr(value)
+                rv = repr_arg(value)
             if stretch:
                 rv = rv.replace("\n", "\n    ")
             r.append(rv)
@@ -104,7 +104,7 @@
         return "".join(r)
 
     def __str__(self):
-        return self.__repr__(repr=str)
+        return self.__repr__(repr_arg=str)
 
 class Nonterminal(Node):
     """Abstract base class for nonterminal symbols.
@@ -157,10 +157,22 @@
         self.repr = repr
 
     def __str__(self):
-        return self.repr
+        prefix, start = self.context
+        return prefix + self.repr
 
 # Node classes for terminal symbols
 
+class Token(Constant):
+    """An otherwise unclassified operator or keyword (e.g. '+' or 'if').
+
+    Attributres:
+
+    repr -- a string giving the token's text.
+
+    """
+
+    __slots__ = []
+
 class Name(Terminal):
     """Name (e.g. a variable name or an attribute name).
 
@@ -176,7 +188,8 @@
         self.name = name
 
     def __str__(self):
-        return self.name
+        prefix, start = self.context
+        return prefix + self.name
 
 class Number(Constant):
     """Numeric constant.
@@ -208,7 +221,7 @@
     def initseries(self, nodes):
         self.nodes = nodes
     def __str__(self):
-        return " ".join(map(str, self.nodes))
+        return "".join(map(str, self.nodes))
 
 class atom(GenericSeries):
     __slots__ = []
@@ -529,7 +542,7 @@
     "Grammar: $ 'not' | '-' $"
     return op
 
-class Operator(Nonterminal):
+class XXXOperator(Nonterminal):
     """Operator.
 
     This has a repr slot and a priority() method.
@@ -619,7 +632,7 @@
 ##     token.LBRACE: vanish,
 ##     token.RBRACE: vanish,
 ##     token.NEWLINE: vanish,
-     token.ENDMARKER: vanish,
+##     token.ENDMARKER: vanish,
 ##     grammar.QUESTIONMARK: vanish,
 
     # All other tokens return the token's string value (e.g. "+")
@@ -658,7 +671,7 @@
             if value in vanishing_keywords:
                 return None
             else:
-                return value
+                return Token(context, value)
         else:
             return Name(context, value)
 
@@ -668,7 +681,7 @@
     if factory:
         return factory(context, value)
     else:
-        return value
+        return Token(context, value)
 
 
 # Support code

Modified: sandbox/trunk/2to3/tokenize.py
==============================================================================
--- sandbox/trunk/2to3/tokenize.py	(original)
+++ sandbox/trunk/2to3/tokenize.py	Thu Nov 30 02:48:54 2006
@@ -370,6 +370,8 @@
                 elif initial in namechars:                 # ordinary name
                     yield (NAME, token, spos, epos, line)
                 elif initial == '\\':                      # continued stmt
+                    # This yield is new; needed for better idempotency:
+                    yield (NL, initial, spos, (spos[0], spos[1]+1), line)
                     continued = 1
                 else:
                     if initial in '([{': parenlev = parenlev + 1