[Python-checkins] cpython (3.1): Issue #10419: Fix build_scripts command of distutils to handle correctly

victor.stinner python-checkins at python.org
Wed May 11 00:59:44 CEST 2011


http://hg.python.org/cpython/rev/fd7d4639dae2
changeset:   70028:fd7d4639dae2
branch:      3.1
parent:      70021:9557e4eeb291
user:        Victor Stinner <victor.stinner at haypocalc.com>
date:        Wed May 11 00:57:29 2011 +0200
summary:
  Issue #10419: Fix build_scripts command of distutils to handle correctly
non-ASCII scripts. Open and write the script in binary mode, but ensure that
the shebang is decodable from UTF-8 and from the encoding of the script.

files:
  Lib/distutils/command/build_scripts.py |  47 ++++++++++---
  Misc/NEWS                              |   4 +
  2 files changed, 40 insertions(+), 11 deletions(-)


diff --git a/Lib/distutils/command/build_scripts.py b/Lib/distutils/command/build_scripts.py
--- a/Lib/distutils/command/build_scripts.py
+++ b/Lib/distutils/command/build_scripts.py
@@ -11,9 +11,11 @@
 from distutils.dep_util import newer
 from distutils.util import convert_path, Mixin2to3
 from distutils import log
+import sys
+import tokenize
 
 # check if Python is called on the first line with this expression
-first_line_re = re.compile('^#!.*python[0-9.]*([ \t].*)?$')
+first_line_re = re.compile(b'^#!.*python[0-9.]*([ \t].*)?$')
 
 class build_scripts(Command):
 
@@ -74,12 +76,14 @@
             # that way, we'll get accurate feedback if we can read the
             # script.
             try:
-                f = open(script, "r")
+                f = open(script, "rb")
             except IOError:
                 if not self.dry_run:
                     raise
                 f = None
             else:
+                encoding, lines = tokenize.detect_encoding(f.readline)
+                f.seek(0)
                 first_line = f.readline()
                 if not first_line:
                     self.warn("%s is an empty file (skipping)" % script)
@@ -88,25 +92,46 @@
                 match = first_line_re.match(first_line)
                 if match:
                     adjust = True
-                    post_interp = match.group(1) or ''
+                    post_interp = match.group(1) or b''
 
             if adjust:
                 log.info("copying and adjusting %s -> %s", script,
                          self.build_dir)
                 updated_files.append(outfile)
                 if not self.dry_run:
-                    outf = open(outfile, "w")
                     if not sysconfig.python_build:
-                        outf.write("#!%s%s\n" %
-                                   (self.executable,
-                                    post_interp))
+                        executable = self.executable
                     else:
-                        outf.write("#!%s%s\n" %
-                                   (os.path.join(
+                        executable = os.path.join(
                             sysconfig.get_config_var("BINDIR"),
                            "python%s%s" % (sysconfig.get_config_var("VERSION"),
-                                           sysconfig.get_config_var("EXE"))),
-                                    post_interp))
+                                           sysconfig.get_config_var("EXE")))
+                    executable = executable.encode(sys.getfilesystemencoding(),
+                                                   'surrogateescape')
+                    shebang = b"#!" + executable + post_interp + b"\n"
+                    # Python parser starts to read a script using UTF-8 until
+                    # it gets a #coding:xxx cookie. The shebang has to be the
+                    # first line of a file, the #coding:xxx cookie cannot be
+                    # written before. So the shebang has to be decodable from
+                    # UTF-8.
+                    try:
+                        shebang.decode('utf-8')
+                    except UnicodeDecodeError:
+                        raise ValueError(
+                            "The shebang ({!r}) is not decodable "
+                            "from utf-8".format(shebang))
+                    # If the script is encoded to a custom encoding (use a
+                    # #coding:xxx cookie), the shebang has to be decodable from
+                    # the script encoding too.
+                    try:
+                        shebang.decode(encoding)
+                    except UnicodeDecodeError:
+                        raise ValueError(
+                            "The shebang ({!r}) is not decodable "
+                            "from the script encoding ({})"
+                            .format(shebang, encoding))
+                    outf = open(outfile, "wb")
+                    outf.write(shebang)
                     outf.writelines(f.readlines())
                     outf.close()
                 if f:
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -69,6 +69,10 @@
 Library
 -------
 
+- Issue #10419: Fix build_scripts command of distutils to handle correctly
+  non-ASCII scripts. Open and write the script in binary mode, but ensure that
+  the shebang is decodable from UTF-8 and from the encoding of the script.
+
 - Issue #12012: ssl.PROTOCOL_SSLv2 becomes optional.
 
 - Issue #11164: Stop trying to use _xmlplus in the xml module.

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list