[Python-checkins] cpython (3.3): Issue #8478: Untokenizer.compat now processes first token from iterator input.

Tue Feb 18 05:17:24 CET 2014

http://hg.python.org/cpython/rev/b6d6ca792b64
changeset:   89241:b6d6ca792b64
branch:      3.3
parent:      89235:a539e09be378
user:        Terry Jan Reedy <tjreedy at udel.edu>
date:        Mon Feb 17 23:12:16 2014 -0500
summary:
  Issue #8478: Untokenizer.compat now processes first token from iterator input.
Patch based on lines from Georg Brandl, Eric Snow, and Gareth Rees.

files:
  Lib/test/test_tokenize.py |  13 +++++++++++++
  Lib/tokenize.py           |  24 +++++++++++-------------
  Misc/NEWS                 |   3 +++
  3 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1165,6 +1165,19 @@
                 'start (1,3) precedes previous end (2,2)')
         self.assertRaises(ValueError, u.add_whitespace, (2,1))
 
+    def test_iter_compat(self):
+        u = Untokenizer()
+        token = (NAME, 'Hello')
+        tokens = [(ENCODING, 'utf-8'), token]
+        u.compat(token, iter([]))
+        self.assertEqual(u.tokens, ["Hello "])
+        u = Untokenizer()
+        self.assertEqual(u.untokenize(iter([token])), 'Hello ')
+        u = Untokenizer()
+        self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
+        self.assertEqual(u.encoding, 'utf-8')
+        self.assertEqual(untokenize(iter(tokens)), b'Hello ')
+
 
 __test__ = {"doctests" : doctests, 'decistmt': decistmt}
 
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -25,12 +25,14 @@
                'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
                'Michael Foord')
 import builtins
+from codecs import lookup, BOM_UTF8
+import collections
+from io import TextIOWrapper
+from itertools import chain
 import re
 import sys
 from token import *
-from codecs import lookup, BOM_UTF8
-import collections
-from io import TextIOWrapper
+
 cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)', re.ASCII)
 blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
 
@@ -237,9 +239,10 @@
             self.tokens.append(" " * col_offset)
 
     def untokenize(self, iterable):
-        for t in iterable:
+        it = iter(iterable)
+        for t in it:
             if len(t) == 2:
-                self.compat(t, iterable)
+                self.compat(t, it)
                 break
             tok_type, token, start, end, line = t
             if tok_type == ENCODING:
@@ -254,17 +257,12 @@
         return "".join(self.tokens)
 
     def compat(self, token, iterable):
-        startline = False
         indents = []
         toks_append = self.tokens.append
-        toknum, tokval = token
+        startline = token[0] in (NEWLINE, NL)
+        prevstring = False
 
-        if toknum in (NAME, NUMBER):
-            tokval += ' '
-        if toknum in (NEWLINE, NL):
-            startline = True
-        prevstring = False
-        for tok in iterable:
+        for tok in chain([token], iterable):
             toknum, tokval = tok[:2]
             if toknum == ENCODING:
                 self.encoding = tokval
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -26,6 +26,9 @@
 - Issue #17671: Fixed a crash when use non-initialized io.BufferedRWPair.
   Based on patch by Stephen Tu.
 
+- Issue #8478: Untokenizer.compat processes first token from iterator input.
+  Patch based on lines from Georg Brandl, Eric Snow, and Gareth Rees.  
+
 - Issue #20594: Avoid name clash with the libc function posix_close.
 
 - Issue #19856: shutil.move() failed to move a directory to other directory

-- 
Repository URL: http://hg.python.org/cpython