[Python-checkins] cpython: Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a new
antoine.pitrou
python-checkins at python.org
Mon Feb 20 01:55:39 CET 2012
http://hg.python.org/cpython/rev/bbaab666e6c7
changeset: 75048:bbaab666e6c7
parent: 75045:5b4b70bd2b6f
user: Antoine Pitrou <solipsis at pitrou.net>
date: Mon Feb 20 01:48:16 2012 +0100
summary:
Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a new importlib.invalidate_caches() function.
importlib is now often faster than imp.find_module() at finding modules.
files:
Doc/library/importlib.rst | 8 +
Lib/importlib/__init__.py | 4 +-
Lib/importlib/_bootstrap.py | 111 +++++++----
Lib/importlib/test/import_/test_path.py | 4 +-
Lib/test/test_import.py | 8 +
Lib/test/test_reprlib.py | 2 +
Misc/NEWS | 3 +
7 files changed, 90 insertions(+), 50 deletions(-)
diff --git a/Doc/library/importlib.rst b/Doc/library/importlib.rst
--- a/Doc/library/importlib.rst
+++ b/Doc/library/importlib.rst
@@ -86,6 +86,14 @@
that was imported (e.g. ``pkg.mod``), while :func:`__import__` returns the
top-level package or module (e.g. ``pkg``).
+.. function:: invalidate_caches()
+
+ Invalidate importlib's internal caches. Calling this function may be
+ needed if some modules are installed while your program is running and
+ you expect the program to notice the changes.
+
+ .. versionadded:: 3.3
+
:mod:`importlib.abc` -- Abstract base classes related to import
---------------------------------------------------------------
diff --git a/Lib/importlib/__init__.py b/Lib/importlib/__init__.py
--- a/Lib/importlib/__init__.py
+++ b/Lib/importlib/__init__.py
@@ -18,7 +18,7 @@
http://www.python.org/dev/peps/pep-0328
"""
-__all__ = ['__import__', 'import_module']
+__all__ = ['__import__', 'import_module', 'invalidate_caches']
from . import _bootstrap
@@ -37,7 +37,7 @@
# Public API #########################################################
-from ._bootstrap import __import__
+from ._bootstrap import __import__, invalidate_caches
def import_module(name, package=None):
diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py
--- a/Lib/importlib/_bootstrap.py
+++ b/Lib/importlib/_bootstrap.py
@@ -21,31 +21,16 @@
CASE_INSENSITIVE_PLATFORMS = 'win', 'cygwin', 'darwin'
-def _case_insensitive_ok(directory, check):
- """Check if the directory contains something matching 'check' exists in the
- directory.
- If PYTHONCASEOK is a defined environment variable then skip the
- case-sensitivity check.
-
- """
- if b'PYTHONCASEOK' not in _os.environ:
- if not directory:
- directory = '.'
- return check in _os.listdir(directory)
+def _relax_case():
+ """True if filenames must be checked case-insensitively."""
+ if any(map(sys.platform.startswith, CASE_INSENSITIVE_PLATFORMS)):
+ def _relax_case():
+ return b'PYTHONCASEOK' in _os.environ
else:
- return True
-
-def _case_sensitive_ok(directory, check):
- """Under case-sensitive filesystems always assume the case matches.
-
- Since other code does the file existence check, that subsumes a
- case-sensitivity check.
-
- """
- return True
-
-_case_ok = None
+ def _relax_case():
+ return False
+ return _relax_case
# TODO: Expose from marshal
@@ -172,6 +157,18 @@
# Finder/loader utility code ##################################################
+_cache_refresh = 0
+
+def invalidate_caches():
+ """Invalidate importlib's internal caches.
+
+ Calling this function may be needed if some modules are installed while
+ your program is running and you expect the program to notice the changes.
+ """
+ global _cache_refresh
+ _cache_refresh += 1
+
+
def set_package(fxn):
"""Set __package__ on the returned module."""
def set_package_wrapper(*args, **kwargs):
@@ -708,7 +705,7 @@
"""
if path == '':
- path = _os.getcwd()
+ path = '.'
try:
finder = sys.path_importer_cache[path]
except KeyError:
@@ -760,29 +757,55 @@
for suffix in detail.suffixes)
self.packages = packages
self.modules = modules
- self.path = path
+ # Base (directory) path
+ self.path = path or '.'
+ self._path_mtime = -1
+ self._path_cache = set()
+ self._cache_refresh = 0
def find_module(self, fullname):
"""Try to find a loader for the specified module."""
tail_module = fullname.rpartition('.')[2]
- base_path = _path_join(self.path, tail_module)
- if _path_isdir(base_path) and _case_ok(self.path, tail_module):
- for suffix, loader in self.packages:
- init_filename = '__init__' + suffix
- full_path = _path_join(base_path, init_filename)
- if (_path_isfile(full_path) and
- _case_ok(base_path, init_filename)):
- return loader(fullname, full_path)
- else:
- msg = "Not importing directory {}: missing __init__"
- _warnings.warn(msg.format(base_path), ImportWarning)
+ if _relax_case():
+ tail_module = tail_module.lower()
+ try:
+ mtime = _os.stat(self.path).st_mtime
+ except OSError:
+ mtime = -1
+ if mtime != self._path_mtime or _cache_refresh != self._cache_refresh:
+ self._fill_cache()
+ self._path_mtime = mtime
+ self._cache_refresh = _cache_refresh
+ cache = self._path_cache
+ if tail_module in cache:
+ base_path = _path_join(self.path, tail_module)
+ if _path_isdir(base_path):
+ for suffix, loader in self.packages:
+ init_filename = '__init__' + suffix
+ full_path = _path_join(base_path, init_filename)
+ if _path_isfile(full_path):
+ return loader(fullname, full_path)
+ else:
+ msg = "Not importing directory {}: missing __init__"
+ _warnings.warn(msg.format(base_path), ImportWarning)
for suffix, loader in self.modules:
mod_filename = tail_module + suffix
- full_path = _path_join(self.path, mod_filename)
- if _path_isfile(full_path) and _case_ok(self.path, mod_filename):
- return loader(fullname, full_path)
+ if mod_filename in cache:
+ full_path = _path_join(self.path, mod_filename)
+ if _path_isfile(full_path):
+ return loader(fullname, full_path)
return None
+ def _fill_cache(self):
+ """Fill the cache of potential modules and packages for this directory."""
+ path = self.path
+ contents = _os.listdir(path)
+ if _relax_case():
+ self._path_cache = set(fn.lower() for fn in contents)
+ else:
+ self._path_cache = set(contents)
+
+
class _SourceFinderDetails:
loader = _SourceFileLoader
@@ -1060,7 +1083,7 @@
modules, those two modules must be explicitly passed in.
"""
- global _case_ok, imp, sys
+ global imp, sys
imp = imp_module
sys = sys_module
@@ -1093,12 +1116,8 @@
raise ImportError('importlib requires posix or nt')
setattr(self_module, '_os', os_module)
setattr(self_module, 'path_sep', path_sep)
-
- if any(sys_module.platform.startswith(x)
- for x in CASE_INSENSITIVE_PLATFORMS):
- _case_ok = _case_insensitive_ok
- else:
- _case_ok = _case_sensitive_ok
+ # Constants
+ setattr(self_module, '_relax_case', _relax_case())
def _install(sys_module, imp_module):
diff --git a/Lib/importlib/test/import_/test_path.py b/Lib/importlib/test/import_/test_path.py
--- a/Lib/importlib/test/import_/test_path.py
+++ b/Lib/importlib/test/import_/test_path.py
@@ -78,11 +78,11 @@
path = ''
module = '<test module>'
importer = util.mock_modules(module)
- hook = import_util.mock_path_hook(os.getcwd(), importer=importer)
+ hook = import_util.mock_path_hook(os.curdir, importer=importer)
with util.import_state(path=[path], path_hooks=[hook]):
loader = machinery.PathFinder.find_module(module)
self.assertIs(loader, importer)
- self.assertIn(os.getcwd(), sys.path_importer_cache)
+ self.assertIn(os.curdir, sys.path_importer_cache)
class DefaultPathFinderTests(unittest.TestCase):
diff --git a/Lib/test/test_import.py b/Lib/test/test_import.py
--- a/Lib/test/test_import.py
+++ b/Lib/test/test_import.py
@@ -2,6 +2,7 @@
import imp
from importlib.test.import_ import test_relative_imports
from importlib.test.import_ import util as importlib_util
+import importlib
import marshal
import os
import platform
@@ -34,6 +35,7 @@
def setUp(self):
remove_files(TESTFN)
+ importlib.invalidate_caches()
def tearDown(self):
unload(TESTFN)
@@ -107,6 +109,7 @@
create_empty_file(fname)
fn = imp.cache_from_source(fname)
unlink(fn)
+ importlib.invalidate_caches()
__import__(TESTFN)
if not os.path.exists(fn):
self.fail("__import__ did not result in creation of "
@@ -260,6 +263,7 @@
os.remove(source)
del sys.modules[TESTFN]
make_legacy_pyc(source)
+ importlib.invalidate_caches()
mod = __import__(TESTFN)
base, ext = os.path.splitext(mod.__file__)
self.assertIn(ext, ('.pyc', '.pyo'))
@@ -358,6 +362,7 @@
with open(self.file_name, "w") as f:
f.write(self.module_source)
sys.path.insert(0, self.dir_name)
+ importlib.invalidate_caches()
def tearDown(self):
sys.path[:] = self.sys_path
@@ -552,6 +557,7 @@
with open(self.source, 'w') as fp:
print('# This is a test file written by test_import.py', file=fp)
sys.path.insert(0, os.curdir)
+ importlib.invalidate_caches()
def tearDown(self):
assert sys.path[0] == os.curdir, 'Unexpected sys.path[0]'
@@ -599,6 +605,7 @@
pyc_file = make_legacy_pyc(self.source)
os.remove(self.source)
unload(TESTFN)
+ importlib.invalidate_caches()
m = __import__(TESTFN)
self.assertEqual(m.__file__,
os.path.join(os.curdir, os.path.relpath(pyc_file)))
@@ -619,6 +626,7 @@
pyc_file = make_legacy_pyc(self.source)
os.remove(self.source)
unload(TESTFN)
+ importlib.invalidate_caches()
m = __import__(TESTFN)
self.assertEqual(m.__cached__,
os.path.join(os.curdir, os.path.relpath(pyc_file)))
diff --git a/Lib/test/test_reprlib.py b/Lib/test/test_reprlib.py
--- a/Lib/test/test_reprlib.py
+++ b/Lib/test/test_reprlib.py
@@ -6,6 +6,7 @@
import sys
import os
import shutil
+import importlib
import unittest
from test.support import run_unittest, create_empty_file
@@ -212,6 +213,7 @@
# Remember where we are
self.here = os.getcwd()
sys.path.insert(0, self.here)
+ importlib.invalidate_caches()
def tearDown(self):
actions = []
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -469,6 +469,9 @@
Library
-------
+- Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a
+ new importlib.invalidate_caches() function.
+
- Issue #14001: CVE-2012-0845: xmlrpc: Fix an endless loop in
SimpleXMLRPCServer upon malformed POST request.
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list