[Python-checkins] cpython: Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a new

antoine.pitrou python-checkins at python.org
Mon Feb 20 01:55:39 CET 2012


http://hg.python.org/cpython/rev/bbaab666e6c7
changeset:   75048:bbaab666e6c7
parent:      75045:5b4b70bd2b6f
user:        Antoine Pitrou <solipsis at pitrou.net>
date:        Mon Feb 20 01:48:16 2012 +0100
summary:
  Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a new importlib.invalidate_caches() function.

importlib is now often faster than imp.find_module() at finding modules.

files:
  Doc/library/importlib.rst               |    8 +
  Lib/importlib/__init__.py               |    4 +-
  Lib/importlib/_bootstrap.py             |  111 +++++++----
  Lib/importlib/test/import_/test_path.py |    4 +-
  Lib/test/test_import.py                 |    8 +
  Lib/test/test_reprlib.py                |    2 +
  Misc/NEWS                               |    3 +
  7 files changed, 90 insertions(+), 50 deletions(-)


diff --git a/Doc/library/importlib.rst b/Doc/library/importlib.rst
--- a/Doc/library/importlib.rst
+++ b/Doc/library/importlib.rst
@@ -86,6 +86,14 @@
     that was imported (e.g. ``pkg.mod``), while :func:`__import__` returns the
     top-level package or module (e.g. ``pkg``).
 
+.. function:: invalidate_caches()
+
+   Invalidate importlib's internal caches.  Calling this function may be
+   needed if some modules are installed while your program is running and
+   you expect the program to notice the changes.
+
+   .. versionadded:: 3.3
+
 
 :mod:`importlib.abc` -- Abstract base classes related to import
 ---------------------------------------------------------------
diff --git a/Lib/importlib/__init__.py b/Lib/importlib/__init__.py
--- a/Lib/importlib/__init__.py
+++ b/Lib/importlib/__init__.py
@@ -18,7 +18,7 @@
           http://www.python.org/dev/peps/pep-0328
 
 """
-__all__ = ['__import__', 'import_module']
+__all__ = ['__import__', 'import_module', 'invalidate_caches']
 
 from . import _bootstrap
 
@@ -37,7 +37,7 @@
 
 # Public API #########################################################
 
-from ._bootstrap import __import__
+from ._bootstrap import __import__, invalidate_caches
 
 
 def import_module(name, package=None):
diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py
--- a/Lib/importlib/_bootstrap.py
+++ b/Lib/importlib/_bootstrap.py
@@ -21,31 +21,16 @@
 
 CASE_INSENSITIVE_PLATFORMS = 'win', 'cygwin', 'darwin'
 
-def _case_insensitive_ok(directory, check):
-    """Check if the directory contains something matching 'check' exists in the
-    directory.
 
-    If PYTHONCASEOK is a defined environment variable then skip the
-    case-sensitivity check.
-
-    """
-    if b'PYTHONCASEOK' not in _os.environ:
-        if not directory:
-            directory = '.'
-        return check in _os.listdir(directory)
+def _relax_case():
+    """True if filenames must be checked case-insensitively."""
+    if any(map(sys.platform.startswith, CASE_INSENSITIVE_PLATFORMS)):
+        def _relax_case():
+            return b'PYTHONCASEOK' in _os.environ
     else:
-        return True
-
-def _case_sensitive_ok(directory, check):
-    """Under case-sensitive filesystems always assume the case matches.
-
-    Since other code does the file existence check, that subsumes a
-    case-sensitivity check.
-
-    """
-    return True
-
-_case_ok = None
+        def _relax_case():
+            return False
+    return _relax_case
 
 
 # TODO: Expose from marshal
@@ -172,6 +157,18 @@
 
 # Finder/loader utility code ##################################################
 
+_cache_refresh = 0
+
+def invalidate_caches():
+    """Invalidate importlib's internal caches.
+
+    Calling this function may be needed if some modules are installed while
+    your program is running and you expect the program to notice the changes.
+    """
+    global _cache_refresh
+    _cache_refresh += 1
+
+
 def set_package(fxn):
     """Set __package__ on the returned module."""
     def set_package_wrapper(*args, **kwargs):
@@ -708,7 +705,7 @@
 
         """
         if path == '':
-            path = _os.getcwd()
+            path = '.'
         try:
             finder = sys.path_importer_cache[path]
         except KeyError:
@@ -760,29 +757,55 @@
                                 for suffix in detail.suffixes)
         self.packages = packages
         self.modules = modules
-        self.path = path
+        # Base (directory) path
+        self.path = path or '.'
+        self._path_mtime = -1
+        self._path_cache = set()
+        self._cache_refresh = 0
 
     def find_module(self, fullname):
         """Try to find a loader for the specified module."""
         tail_module = fullname.rpartition('.')[2]
-        base_path = _path_join(self.path, tail_module)
-        if _path_isdir(base_path) and _case_ok(self.path, tail_module):
-            for suffix, loader in self.packages:
-                init_filename = '__init__' + suffix
-                full_path = _path_join(base_path, init_filename)
-                if (_path_isfile(full_path) and
-                        _case_ok(base_path, init_filename)):
-                    return loader(fullname, full_path)
-            else:
-                msg = "Not importing directory {}: missing __init__"
-                _warnings.warn(msg.format(base_path), ImportWarning)
+        if _relax_case():
+            tail_module = tail_module.lower()
+        try:
+            mtime = _os.stat(self.path).st_mtime
+        except OSError:
+            mtime = -1
+        if mtime != self._path_mtime or _cache_refresh != self._cache_refresh:
+            self._fill_cache()
+            self._path_mtime = mtime
+            self._cache_refresh = _cache_refresh
+        cache = self._path_cache
+        if tail_module in cache:
+            base_path = _path_join(self.path, tail_module)
+            if _path_isdir(base_path):
+                for suffix, loader in self.packages:
+                    init_filename = '__init__' + suffix
+                    full_path = _path_join(base_path, init_filename)
+                    if _path_isfile(full_path):
+                        return loader(fullname, full_path)
+                else:
+                    msg = "Not importing directory {}: missing __init__"
+                    _warnings.warn(msg.format(base_path), ImportWarning)
         for suffix, loader in self.modules:
             mod_filename = tail_module + suffix
-            full_path = _path_join(self.path, mod_filename)
-            if _path_isfile(full_path) and _case_ok(self.path, mod_filename):
-                return loader(fullname, full_path)
+            if mod_filename in cache:
+                full_path = _path_join(self.path, mod_filename)
+                if _path_isfile(full_path):
+                    return loader(fullname, full_path)
         return None
 
+    def _fill_cache(self):
+        """Fill the cache of potential modules and packages for this directory."""
+        path = self.path
+        contents = _os.listdir(path)
+        if _relax_case():
+            self._path_cache = set(fn.lower() for fn in contents)
+        else:
+            self._path_cache = set(contents)
+
+
 class _SourceFinderDetails:
 
     loader = _SourceFileLoader
@@ -1060,7 +1083,7 @@
     modules, those two modules must be explicitly passed in.
 
     """
-    global _case_ok, imp, sys
+    global imp, sys
     imp = imp_module
     sys = sys_module
 
@@ -1093,12 +1116,8 @@
         raise ImportError('importlib requires posix or nt')
     setattr(self_module, '_os', os_module)
     setattr(self_module, 'path_sep', path_sep)
-
-    if any(sys_module.platform.startswith(x)
-            for x in CASE_INSENSITIVE_PLATFORMS):
-        _case_ok = _case_insensitive_ok
-    else:
-        _case_ok = _case_sensitive_ok
+    # Constants
+    setattr(self_module, '_relax_case', _relax_case())
 
 
 def _install(sys_module, imp_module):
diff --git a/Lib/importlib/test/import_/test_path.py b/Lib/importlib/test/import_/test_path.py
--- a/Lib/importlib/test/import_/test_path.py
+++ b/Lib/importlib/test/import_/test_path.py
@@ -78,11 +78,11 @@
         path = ''
         module = '<test module>'
         importer = util.mock_modules(module)
-        hook = import_util.mock_path_hook(os.getcwd(), importer=importer)
+        hook = import_util.mock_path_hook(os.curdir, importer=importer)
         with util.import_state(path=[path], path_hooks=[hook]):
             loader = machinery.PathFinder.find_module(module)
             self.assertIs(loader, importer)
-            self.assertIn(os.getcwd(), sys.path_importer_cache)
+            self.assertIn(os.curdir, sys.path_importer_cache)
 
 
 class DefaultPathFinderTests(unittest.TestCase):
diff --git a/Lib/test/test_import.py b/Lib/test/test_import.py
--- a/Lib/test/test_import.py
+++ b/Lib/test/test_import.py
@@ -2,6 +2,7 @@
 import imp
 from importlib.test.import_ import test_relative_imports
 from importlib.test.import_ import util as importlib_util
+import importlib
 import marshal
 import os
 import platform
@@ -34,6 +35,7 @@
 
     def setUp(self):
         remove_files(TESTFN)
+        importlib.invalidate_caches()
 
     def tearDown(self):
         unload(TESTFN)
@@ -107,6 +109,7 @@
                 create_empty_file(fname)
                 fn = imp.cache_from_source(fname)
                 unlink(fn)
+                importlib.invalidate_caches()
                 __import__(TESTFN)
                 if not os.path.exists(fn):
                     self.fail("__import__ did not result in creation of "
@@ -260,6 +263,7 @@
             os.remove(source)
             del sys.modules[TESTFN]
             make_legacy_pyc(source)
+            importlib.invalidate_caches()
             mod = __import__(TESTFN)
             base, ext = os.path.splitext(mod.__file__)
             self.assertIn(ext, ('.pyc', '.pyo'))
@@ -358,6 +362,7 @@
         with open(self.file_name, "w") as f:
             f.write(self.module_source)
         sys.path.insert(0, self.dir_name)
+        importlib.invalidate_caches()
 
     def tearDown(self):
         sys.path[:] = self.sys_path
@@ -552,6 +557,7 @@
         with open(self.source, 'w') as fp:
             print('# This is a test file written by test_import.py', file=fp)
         sys.path.insert(0, os.curdir)
+        importlib.invalidate_caches()
 
     def tearDown(self):
         assert sys.path[0] == os.curdir, 'Unexpected sys.path[0]'
@@ -599,6 +605,7 @@
         pyc_file = make_legacy_pyc(self.source)
         os.remove(self.source)
         unload(TESTFN)
+        importlib.invalidate_caches()
         m = __import__(TESTFN)
         self.assertEqual(m.__file__,
                          os.path.join(os.curdir, os.path.relpath(pyc_file)))
@@ -619,6 +626,7 @@
         pyc_file = make_legacy_pyc(self.source)
         os.remove(self.source)
         unload(TESTFN)
+        importlib.invalidate_caches()
         m = __import__(TESTFN)
         self.assertEqual(m.__cached__,
                          os.path.join(os.curdir, os.path.relpath(pyc_file)))
diff --git a/Lib/test/test_reprlib.py b/Lib/test/test_reprlib.py
--- a/Lib/test/test_reprlib.py
+++ b/Lib/test/test_reprlib.py
@@ -6,6 +6,7 @@
 import sys
 import os
 import shutil
+import importlib
 import unittest
 
 from test.support import run_unittest, create_empty_file
@@ -212,6 +213,7 @@
         # Remember where we are
         self.here = os.getcwd()
         sys.path.insert(0, self.here)
+        importlib.invalidate_caches()
 
     def tearDown(self):
         actions = []
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -469,6 +469,9 @@
 Library
 -------
 
+- Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a
+  new importlib.invalidate_caches() function.
+
 - Issue #14001: CVE-2012-0845: xmlrpc: Fix an endless loop in
   SimpleXMLRPCServer upon malformed POST request.
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list