[Python-checkins] cpython (2.7): Issue #22221: Backported fixes from Python 3 (issue #18960).

serhiy.storchaka python-checkins at python.org
Fri Sep 5 09:26:26 CEST 2014


http://hg.python.org/cpython/rev/dd1e21f17b1c
changeset:   92346:dd1e21f17b1c
branch:      2.7
parent:      92344:48033d90c61d
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Fri Sep 05 10:22:05 2014 +0300
summary:
  Issue #22221: Backported fixes from Python 3 (issue #18960).

* Now the source encoding declaration on the second line isn't effective if
  the first line contains anything except a comment.  This affects compile(),
  eval() and exec() too.

* IDLE now ignores the source encoding declaration on the second line if the
  first line contains anything except a comment.

* 2to3 and the findnocoding.py script now ignore the source encoding
  declaration on the second line if the first line contains anything except
  a comment.

files:
  Lib/idlelib/IOBinding.py      |   3 +++
  Lib/lib2to3/pgen2/tokenize.py |   3 +++
  Lib/test/test_compile.py      |  17 ++++++++++++++++-
  Misc/NEWS                     |  10 ++++++++++
  Parser/tokenizer.c            |  20 +++++++++++++++++---
  Tools/scripts/findnocoding.py |   4 +++-
  6 files changed, 52 insertions(+), 5 deletions(-)


diff --git a/Lib/idlelib/IOBinding.py b/Lib/idlelib/IOBinding.py
--- a/Lib/idlelib/IOBinding.py
+++ b/Lib/idlelib/IOBinding.py
@@ -72,6 +72,7 @@
 encoding = encoding.lower()
 
 coding_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)')
+blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)')
 
 class EncodingMessage(SimpleDialog):
     "Inform user that an encoding declaration is needed."
@@ -130,6 +131,8 @@
         match = coding_re.match(line)
         if match is not None:
             break
+        if not blank_re.match(line):
+            return None
     else:
         return None
     name = match.group(1)
diff --git a/Lib/lib2to3/pgen2/tokenize.py b/Lib/lib2to3/pgen2/tokenize.py
--- a/Lib/lib2to3/pgen2/tokenize.py
+++ b/Lib/lib2to3/pgen2/tokenize.py
@@ -237,6 +237,7 @@
             toks_append(tokval)
 
 cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)')
+blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)')
 
 def _get_normal_name(orig_enc):
     """Imitates get_normal_name in tokenizer.c."""
@@ -309,6 +310,8 @@
     encoding = find_cookie(first)
     if encoding:
         return encoding, [first]
+    if not blank_re.match(first):
+        return default, [first]
 
     second = read_or_stop()
     if not second:
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -412,9 +412,24 @@
         l = lambda: "foo"
         self.assertIsNone(l.__doc__)
 
-    def test_unicode_encoding(self):
+    @test_support.requires_unicode
+    def test_encoding(self):
+        code = b'# -*- coding: badencoding -*-\npass\n'
+        self.assertRaises(SyntaxError, compile, code, 'tmp', 'exec')
         code = u"# -*- coding: utf-8 -*-\npass\n"
         self.assertRaises(SyntaxError, compile, code, "tmp", "exec")
+        code = 'u"\xc2\xa4"\n'
+        self.assertEqual(eval(code), u'\xc2\xa4')
+        code = u'u"\xc2\xa4"\n'
+        self.assertEqual(eval(code), u'\xc2\xa4')
+        code = '# -*- coding: latin1 -*-\nu"\xc2\xa4"\n'
+        self.assertEqual(eval(code), u'\xc2\xa4')
+        code = '# -*- coding: utf-8 -*-\nu"\xc2\xa4"\n'
+        self.assertEqual(eval(code), u'\xa4')
+        code = '# -*- coding: iso8859-15 -*-\nu"\xc2\xa4"\n'
+        self.assertEqual(eval(code), test_support.u(r'\xc2\u20ac'))
+        code = 'u"""\\\n# -*- coding: utf-8 -*-\n\xc2\xa4"""\n'
+        self.assertEqual(eval(code), u'# -*- coding: utf-8 -*-\n\xc2\xa4')
 
     def test_subscripts(self):
         # SF bug 1448804
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
 Core and Builtins
 -----------------
 
+- Issue #22221: Now the source encoding declaration on the second line isn't
+  effective if the first line contains anything except a comment.
+
 - Issue #22023: Fix ``%S``, ``%R`` and ``%V`` formats of
   :c:func:`PyUnicode_FromFormat`.
 
@@ -124,6 +127,9 @@
 IDLE
 ----
 
+- Issue #22221: IDLE now ignores the source encoding declaration on the second
+  line if the first line contains anything except a comment.
+
 - Issue #17390: Adjust Editor window title; remove 'Python',
   move version to end.
 
@@ -140,6 +146,10 @@
 Tools/Demos
 -----------
 
+- Issue #22221: 2to3 and the findnocoding.py script now ignore the source
+  encoding declaration on the second line if the first line contains anything
+  except a comment.
+
 - Issue #22201: Command-line interface of the zipfile module now correctly
   extracts ZIP files with directory entries.  Patch by Ryan Wilson.
 
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -259,11 +259,25 @@
     char * cs;
     int r = 1;
 
-    if (tok->cont_line)
+    if (tok->cont_line) {
         /* It's a continuation line, so it can't be a coding spec. */
+        tok->read_coding_spec = 1;
         return 1;
+    }
     cs = get_coding_spec(line, size);
-    if (cs != NULL) {
+    if (!cs) {
+        Py_ssize_t i;
+        for (i = 0; i < size; i++) {
+            if (line[i] == '#' || line[i] == '\n' || line[i] == '\r')
+                break;
+            if (line[i] != ' ' && line[i] != '\t' && line[i] != '\014') {
+                /* Stop checking coding spec after a line containing
+                 * anything except a comment. */
+                tok->read_coding_spec = 1;
+                break;
+            }
+        }
+    } else {
         tok->read_coding_spec = 1;
         if (tok->encoding == NULL) {
             assert(tok->decoding_state == 1); /* raw */
@@ -688,7 +702,7 @@
     if (newl[0]) {
         if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl))
             return error_ret(tok);
-        if (tok->enc == NULL && newl[1]) {
+        if (tok->enc == NULL && !tok->read_coding_spec && newl[1]) {
             if (!check_coding_spec(newl[0]+1, newl[1] - newl[0],
                                    tok, buf_setreadl))
                 return error_ret(tok);
diff --git a/Tools/scripts/findnocoding.py b/Tools/scripts/findnocoding.py
--- a/Tools/scripts/findnocoding.py
+++ b/Tools/scripts/findnocoding.py
@@ -33,6 +33,7 @@
 
 
 decl_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)')
+blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)')
 
 def get_declaration(line):
     match = decl_re.match(line)
@@ -57,7 +58,8 @@
     line1 = infile.readline()
     line2 = infile.readline()
 
-    if get_declaration(line1) or get_declaration(line2):
+    if (get_declaration(line1) or
+        blank_re.match(line1) and get_declaration(line2)):
         # the file does have an encoding declaration, so trust it
         infile.close()
         return False

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list