[Python-checkins] bpo-45703: Invalidate _NamespacePath cache on importlib.invalidate_ca… (GH-29384)

miss-islington webhook-mailer at python.org
Tue Nov 23 10:38:11 EST 2021


https://github.com/python/cpython/commit/ae1965ccb4b1fad63fab40fe8805d1b8247668d3
commit: ae1965ccb4b1fad63fab40fe8805d1b8247668d3
branch: main
author: Miro Hrončok <miro at hroncok.cz>
committer: miss-islington <31488909+miss-islington at users.noreply.github.com>
date: 2021-11-23T07:38:02-08:00
summary:

bpo-45703: Invalidate _NamespacePath cache on importlib.invalidate_ca… (GH-29384)



Consider the following directory structure:

    .
    └── PATH1
        └── namespace
            └── sub1
                └── __init__.py

And both PATH1 and PATH2 in sys path:

    $ PYTHONPATH=PATH1:PATH2 python3.11
    >>> import namespace
    >>> import namespace.sub1
    >>> namespace.__path__
    _NamespacePath(['.../PATH1/namespace'])
    >>> ...

While this interpreter still runs, PATH2/namespace/sub2 is created:

    .
    ├── PATH1
    │   └── namespace
    │       └── sub1
    │           └── __init__.py
    └── PATH2
        └── namespace
            └── sub2
                └── __init__.py

The newly created module cannot be imported:

    >>> ...
    >>> namespace.__path__
    _NamespacePath(['.../PATH1/namespace'])
    >>> import namespace.sub2
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    ModuleNotFoundError: No module named 'namespace.sub2'

Calling importlib.invalidate_caches() now newly allows to import it:

    >>> import importlib
    >>> importlib.invalidate_caches()
    >>> namespace.__path__
    _NamespacePath(['.../PATH1/namespace'])
    >>> import namespace.sub2
    >>> namespace.__path__
    _NamespacePath(['.../PATH1/namespace', '.../PATH2/namespace'])

This was not previously possible.

files:
A Misc/NEWS.d/next/Library/2021-11-03-13-41-49.bpo-45703.35AagL.rst
M Doc/library/importlib.rst
M Lib/importlib/_bootstrap_external.py
M Lib/test/test_importlib/test_namespace_pkgs.py

diff --git a/Doc/library/importlib.rst b/Doc/library/importlib.rst
index 6b71e64bcbfa7..59c8c64acdc3b 100644
--- a/Doc/library/importlib.rst
+++ b/Doc/library/importlib.rst
@@ -145,6 +145,10 @@ Functions
 
    .. versionadded:: 3.3
 
+   .. versionchanged:: 3.10
+      Namespace packages created/installed in a different :data:`sys.path`
+      location after the same namespace was already imported are noticed.
+
 .. function:: reload(module)
 
    Reload a previously imported *module*.  The argument must be a module object,
diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py
index 85c5193407d80..6970e9f0a94d4 100644
--- a/Lib/importlib/_bootstrap_external.py
+++ b/Lib/importlib/_bootstrap_external.py
@@ -1231,10 +1231,15 @@ class _NamespacePath:
     using path_finder.  For top-level modules, the parent module's path
     is sys.path."""
 
+    # When invalidate_caches() is called, this epoch is incremented
+    # https://bugs.python.org/issue45703
+    _epoch = 0
+
     def __init__(self, name, path, path_finder):
         self._name = name
         self._path = path
         self._last_parent_path = tuple(self._get_parent_path())
+        self._last_epoch = self._epoch
         self._path_finder = path_finder
 
     def _find_parent_path_names(self):
@@ -1254,7 +1259,7 @@ def _get_parent_path(self):
     def _recalculate(self):
         # If the parent's path has changed, recalculate _path
         parent_path = tuple(self._get_parent_path()) # Make a copy
-        if parent_path != self._last_parent_path:
+        if parent_path != self._last_parent_path or self._epoch != self._last_epoch:
             spec = self._path_finder(self._name, parent_path)
             # Note that no changes are made if a loader is returned, but we
             #  do remember the new parent path
@@ -1262,6 +1267,7 @@ def _recalculate(self):
                 if spec.submodule_search_locations:
                     self._path = spec.submodule_search_locations
             self._last_parent_path = parent_path     # Save the copy
+            self._last_epoch = self._epoch
         return self._path
 
     def __iter__(self):
@@ -1355,6 +1361,9 @@ def invalidate_caches():
                 del sys.path_importer_cache[name]
             elif hasattr(finder, 'invalidate_caches'):
                 finder.invalidate_caches()
+        # Also invalidate the caches of _NamespacePaths
+        # https://bugs.python.org/issue45703
+        _NamespacePath._epoch += 1
 
     @staticmethod
     def _path_hooks(path):
diff --git a/Lib/test/test_importlib/test_namespace_pkgs.py b/Lib/test/test_importlib/test_namespace_pkgs.py
index f80283233f977..2ea41b7a4c5c3 100644
--- a/Lib/test/test_importlib/test_namespace_pkgs.py
+++ b/Lib/test/test_importlib/test_namespace_pkgs.py
@@ -4,6 +4,7 @@
 import importlib.machinery
 import os
 import sys
+import tempfile
 import unittest
 import warnings
 
@@ -130,6 +131,40 @@ def test_imports(self):
         self.assertEqual(foo.two.attr, 'portion2 foo two')
 
 
+class SeparatedNamespacePackagesCreatedWhileRunning(NamespacePackageTest):
+    paths = ['portion1']
+
+    def test_invalidate_caches(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # we manipulate sys.path before anything is imported to avoid
+            # accidental cache invalidation when changing it
+            sys.path.append(temp_dir)
+
+            import foo.one
+            self.assertEqual(foo.one.attr, 'portion1 foo one')
+
+            # the module does not exist, so it cannot be imported
+            with self.assertRaises(ImportError):
+                import foo.just_created
+
+            # util.create_modules() manipulates sys.path
+            # so we must create the modules manually instead
+            namespace_path = os.path.join(temp_dir, 'foo')
+            os.mkdir(namespace_path)
+            module_path = os.path.join(namespace_path, 'just_created.py')
+            with open(module_path, 'w', encoding='utf-8') as file:
+                file.write('attr = "just_created foo"')
+
+            # the module is not known, so it cannot be imported yet
+            with self.assertRaises(ImportError):
+                import foo.just_created
+
+            # but after explicit cache invalidation, it is importable
+            importlib.invalidate_caches()
+            import foo.just_created
+            self.assertEqual(foo.just_created.attr, 'just_created foo')
+
+
 class SeparatedOverlappingNamespacePackages(NamespacePackageTest):
     paths = ['portion1', 'both_portions']
 
diff --git a/Misc/NEWS.d/next/Library/2021-11-03-13-41-49.bpo-45703.35AagL.rst b/Misc/NEWS.d/next/Library/2021-11-03-13-41-49.bpo-45703.35AagL.rst
new file mode 100644
index 0000000000000..9fa9be56b8327
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-11-03-13-41-49.bpo-45703.35AagL.rst
@@ -0,0 +1,5 @@
+When a namespace package is imported before another module from the same
+namespace is created/installed in a different :data:`sys.path` location
+while the program is running, calling the
+:func:`importlib.invalidate_caches` function will now also guarantee the new
+module is noticed.



More information about the Python-checkins mailing list