[Python-checkins] closes bpo-34056: Always return bytes from _HackedGetData.get_data(). (GH-8130)
Benjamin Peterson
webhook-mailer at python.org
Fri Jul 6 23:41:11 EDT 2018
https://github.com/python/cpython/commit/b0274f2cddd36b49fe5080efbe160277ef546471
commit: b0274f2cddd36b49fe5080efbe160277ef546471
branch: master
author: Benjamin Peterson <benjamin at python.org>
committer: GitHub <noreply at github.com>
date: 2018-07-06T20:41:06-07:00
summary:
closes bpo-34056: Always return bytes from _HackedGetData.get_data(). (GH-8130)
* Always return bytes from _HackedGetData.get_data().
Ensure the imp.load_source shim always returns bytes by reopening the file in
binary mode if needed. Hash-based pycs have to receive the source code in bytes.
It's tempting to change imp.get_suffixes() to always return 'rb' as a mode, but
that breaks some stdlib tests and likely 3rdparty code, too.
files:
A Misc/NEWS.d/next/Library/2018-07-05-22-45-46.bpo-34056.86isrU.rst
M Lib/imp.py
M Lib/test/test_imp.py
diff --git a/Lib/imp.py b/Lib/imp.py
index 866464b245b2..31f8c766381a 100644
--- a/Lib/imp.py
+++ b/Lib/imp.py
@@ -142,17 +142,16 @@ def __init__(self, fullname, path, file=None):
def get_data(self, path):
"""Gross hack to contort loader to deal w/ load_*()'s bad API."""
if self.file and path == self.path:
+ # The contract of get_data() requires us to return bytes. Reopen the
+ # file in binary mode if needed.
if not self.file.closed:
file = self.file
- else:
- self.file = file = open(self.path, 'r')
+ if 'b' not in file.mode:
+ file.close()
+ if self.file.closed:
+ self.file = file = open(self.path, 'rb')
with file:
- # Technically should be returning bytes, but
- # SourceLoader.get_code() just passed what is returned to
- # compile() which can handle str. And converting to bytes would
- # require figuring out the encoding to decode to and
- # tokenize.detect_encoding() only accepts bytes.
return file.read()
else:
return super().get_data(path)
diff --git a/Lib/test/test_imp.py b/Lib/test/test_imp.py
index a115e60d4e4f..bb0144b12d41 100644
--- a/Lib/test/test_imp.py
+++ b/Lib/test/test_imp.py
@@ -2,6 +2,7 @@
import importlib.util
import os
import os.path
+import py_compile
import sys
from test import support
from test.support import script_helper
@@ -350,6 +351,20 @@ def test_pyc_invalidation_mode_from_cmdline(self):
res = script_helper.assert_python_ok(*args)
self.assertEqual(res.out.strip().decode('utf-8'), expected)
+ def test_find_and_load_checked_pyc(self):
+ # issue 34056
+ with support.temp_cwd():
+ with open('mymod.py', 'wb') as fp:
+ fp.write(b'x = 42\n')
+ py_compile.compile(
+ 'mymod.py',
+ doraise=True,
+ invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH,
+ )
+ file, path, description = imp.find_module('mymod', path=['.'])
+ mod = imp.load_module('mymod', file, path, description)
+ self.assertEqual(mod.x, 42)
+
class ReloadTests(unittest.TestCase):
diff --git a/Misc/NEWS.d/next/Library/2018-07-05-22-45-46.bpo-34056.86isrU.rst b/Misc/NEWS.d/next/Library/2018-07-05-22-45-46.bpo-34056.86isrU.rst
new file mode 100644
index 000000000000..edc0135efc60
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-07-05-22-45-46.bpo-34056.86isrU.rst
@@ -0,0 +1,3 @@
+Ensure the loader shim created by ``imp.load_module`` always returns bytes
+from its ``get_data()`` function. This fixes using ``imp.load_module`` with
+:pep:`552` hash-based pycs.
More information about the Python-checkins
mailing list