[Python-checkins] bpo-33899: Revert tokenize module adding an implicit final NEWLINE (GH-10072)
Benjamin Peterson
webhook-mailer at python.org
Wed Oct 24 13:32:27 EDT 2018
https://github.com/python/cpython/commit/a1f45ec73f0486b187633e7ebc0a4f559d29d7d9
commit: a1f45ec73f0486b187633e7ebc0a4f559d29d7d9
branch: 2.7
author: Tal Einat <taleinat+github at gmail.com>
committer: Benjamin Peterson <benjamin at python.org>
date: 2018-10-24T10:32:21-07:00
summary:
bpo-33899: Revert tokenize module adding an implicit final NEWLINE (GH-10072)
This reverts commit 7829bba.
files:
D Misc/NEWS.d/next/Library/2018-06-24-01-57-14.bpo-33899.IaOcAr.rst
M Lib/test/test_tokenize.py
M Lib/tokenize.py
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index a4625971d378..fd9486bdd7d3 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -1,54 +1,32 @@
from test import test_support
-from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP, NEWLINE,
+from tokenize import (untokenize, generate_tokens, NUMBER, NAME, OP,
STRING, ENDMARKER, tok_name, Untokenizer, tokenize)
from StringIO import StringIO
import os
from unittest import TestCase
-# Converts a source string into a list of textual representation
-# of the tokens such as:
-# ` NAME 'if' (1, 0) (1, 2)`
-# to make writing tests easier.
-def stringify_tokens_from_source(token_generator, source_string):
- result = []
- num_lines = len(source_string.splitlines())
- missing_trailing_nl = source_string[-1] not in '\r\n'
-
- for type, token, start, end, line in token_generator:
- if type == ENDMARKER:
- break
- # Ignore the new line on the last line if the input lacks one
- if missing_trailing_nl and type == NEWLINE and end[0] == num_lines:
- continue
- type = tok_name[type]
- result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %
- locals())
-
- return result
-
class TokenizeTest(TestCase):
# Tests for the tokenize module.
# The tests can be really simple. Given a small fragment of source
- # code, print out a table with tokens. The ENDMARKER, ENCODING and
- # final NEWLINE are omitted for brevity.
+ # code, print out a table with tokens. The ENDMARKER is omitted for
+ # brevity.
def check_tokenize(self, s, expected):
# Format the tokens in s in a table format.
+ # The ENDMARKER is omitted.
+ result = []
f = StringIO(s)
- result = stringify_tokens_from_source(generate_tokens(f.readline), s)
-
+ for type, token, start, end, line in generate_tokens(f.readline):
+ if type == ENDMARKER:
+ break
+ type = tok_name[type]
+ result.append(" %(type)-10.10s %(token)-13.13r %(start)s %(end)s" %
+ locals())
self.assertEqual(result,
expected.rstrip().splitlines())
- def test_implicit_newline(self):
- # Make sure that the tokenizer puts in an implicit NEWLINE
- # when the input lacks a trailing new line.
- f = StringIO("x")
- tokens = list(generate_tokens(f.readline))
- self.assertEqual(tokens[-2][0], NEWLINE)
- self.assertEqual(tokens[-1][0], ENDMARKER)
def test_basic(self):
self.check_tokenize("1 + 1", """\
@@ -638,7 +616,7 @@ def test_roundtrip(self):
self.check_roundtrip("if x == 1:\n"
" print x\n")
self.check_roundtrip("# This is a comment\n"
- "# This also\n")
+ "# This also")
# Some people use different formatting conventions, which makes
# untokenize a little trickier. Note that this test involves trailing
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 6c857f854733..d426cd2df52a 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -306,15 +306,8 @@ def generate_tokens(readline):
contline = None
indents = [0]
- last_line = b''
- line = b''
while 1: # loop over lines in stream
try:
- # We capture the value of the line variable here because
- # readline uses the empty string '' to signal end of input,
- # hence `line` itself will always be overwritten at the end
- # of this loop.
- last_line = line
line = readline()
except StopIteration:
line = ''
@@ -444,9 +437,6 @@ def generate_tokens(readline):
(lnum, pos), (lnum, pos+1), line)
pos += 1
- # Add an implicit NEWLINE if the input doesn't end in one
- if last_line and last_line[-1] not in '\r\n':
- yield (NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
for indent in indents[1:]: # pop remaining indent levels
yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
diff --git a/Misc/NEWS.d/next/Library/2018-06-24-01-57-14.bpo-33899.IaOcAr.rst b/Misc/NEWS.d/next/Library/2018-06-24-01-57-14.bpo-33899.IaOcAr.rst
deleted file mode 100644
index 21c909599363..000000000000
--- a/Misc/NEWS.d/next/Library/2018-06-24-01-57-14.bpo-33899.IaOcAr.rst
+++ /dev/null
@@ -1,3 +0,0 @@
-Tokenize module now implicitly emits a NEWLINE when provided with input that
-does not have a trailing new line. This behavior now matches what the C
-tokenizer does internally. Contributed by Ammar Askar.
More information about the Python-checkins
mailing list