[Python-checkins] bpo-34572: change _pickle unpickling to use import rather than retrieving from sys.modules (GH-9047)

Antoine Pitrou webhook-mailer at python.org
Mon Feb 18 10:30:57 EST 2019


https://github.com/python/cpython/commit/4371c0a9c0848f7a0947d43f26f234842b41efdf
commit: 4371c0a9c0848f7a0947d43f26f234842b41efdf
branch: master
author: tjb900 <ozburgess at gmail.com>
committer: Antoine Pitrou <pitrou at free.fr>
date: 2019-02-18T15:30:51Z
summary:

bpo-34572: change _pickle unpickling to use import rather than retrieving from sys.modules (GH-9047)

Fix C implementation of pickle.loads to use importlib's locking mechanisms, and thereby avoid using partially-loaded modules.

files:
A Misc/NEWS.d/next/Library/2018-09-05-03-02-32.bpo-34572.ayisd2.rst
M Lib/test/pickletester.py
M Modules/_pickle.c

diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 71c2feadb613..293393fe3713 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -3,18 +3,22 @@
 import dbm
 import io
 import functools
+import os
 import pickle
 import pickletools
+import shutil
 import struct
 import sys
+import threading
 import unittest
 import weakref
+from textwrap import dedent
 from http.cookies import SimpleCookie
 
 from test import support
 from test.support import (
     TestFailed, TESTFN, run_with_locale, no_tracing,
-    _2G, _4G, bigmemtest,
+    _2G, _4G, bigmemtest, reap_threads, forget,
     )
 
 from pickle import bytes_types
@@ -1174,6 +1178,67 @@ def test_truncated_data(self):
         for p in badpickles:
             self.check_unpickling_error(self.truncated_errors, p)
 
+    @reap_threads
+    def test_unpickle_module_race(self):
+        # https://bugs.python.org/issue34572
+        locker_module = dedent("""
+        import threading
+        barrier = threading.Barrier(2)
+        """)
+        locking_import_module = dedent("""
+        import locker
+        locker.barrier.wait()
+        class ToBeUnpickled(object):
+            pass
+        """)
+
+        os.mkdir(TESTFN)
+        self.addCleanup(shutil.rmtree, TESTFN)
+        sys.path.insert(0, TESTFN)
+        self.addCleanup(sys.path.remove, TESTFN)
+        with open(os.path.join(TESTFN, "locker.py"), "wb") as f:
+            f.write(locker_module.encode('utf-8'))
+        with open(os.path.join(TESTFN, "locking_import.py"), "wb") as f:
+            f.write(locking_import_module.encode('utf-8'))
+        self.addCleanup(forget, "locker")
+        self.addCleanup(forget, "locking_import")
+
+        import locker
+
+        pickle_bytes = (
+            b'\x80\x03clocking_import\nToBeUnpickled\nq\x00)\x81q\x01.')
+
+        # Then try to unpickle two of these simultaneously
+        # One of them will cause the module import, and we want it to block
+        # until the other one either:
+        #   - fails (before the patch for this issue)
+        #   - blocks on the import lock for the module, as it should
+        results = []
+        barrier = threading.Barrier(3)
+        def t():
+            # This ensures the threads have all started
+            # presumably barrier release is faster than thread startup
+            barrier.wait()
+            results.append(pickle.loads(pickle_bytes))
+
+        t1 = threading.Thread(target=t)
+        t2 = threading.Thread(target=t)
+        t1.start()
+        t2.start()
+
+        barrier.wait()
+        # could have delay here
+        locker.barrier.wait()
+
+        t1.join()
+        t2.join()
+
+        from locking_import import ToBeUnpickled
+        self.assertEqual(
+            [type(x) for x in results],
+            [ToBeUnpickled] * 2)
+
+
 
 class AbstractPickleTests(unittest.TestCase):
     # Subclass must define self.dumps, self.loads.
diff --git a/Misc/NEWS.d/next/Library/2018-09-05-03-02-32.bpo-34572.ayisd2.rst b/Misc/NEWS.d/next/Library/2018-09-05-03-02-32.bpo-34572.ayisd2.rst
new file mode 100644
index 000000000000..0468d96420f3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-09-05-03-02-32.bpo-34572.ayisd2.rst
@@ -0,0 +1,3 @@
+Fix C implementation of pickle.loads to use importlib's locking
+mechanisms, and thereby avoid using partially-loaded modules.
+Patch by Tim Burgess.
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index 58cd09ca61a6..2b97294e1e86 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -6636,13 +6636,13 @@ _pickle_Unpickler_find_class_impl(UnpicklerObject *self,
         }
     }
 
-    module = PyImport_GetModule(module_name);
+    /*
+     * we don't use PyImport_GetModule here, because it can return partially-
+     * initialised modules, which then cause the getattribute to fail.
+     */
+    module = PyImport_Import(module_name);
     if (module == NULL) {
-        if (PyErr_Occurred())
-            return NULL;
-        module = PyImport_Import(module_name);
-        if (module == NULL)
-            return NULL;
+        return NULL;
     }
     global = getattribute(module, global_name, self->proto >= 4);
     Py_DECREF(module);



More information about the Python-checkins mailing list