[Python-checkins] cpython: Issue #17621: Introduce importlib.util.LazyLoader.

brett.cannon python-checkins at python.org
Fri Apr 4 19:53:47 CEST 2014


http://hg.python.org/cpython/rev/52b58618199c
changeset:   90141:52b58618199c
user:        Brett Cannon <brett at python.org>
date:        Fri Apr 04 13:53:38 2014 -0400
summary:
  Issue #17621: Introduce importlib.util.LazyLoader.

files:
  Doc/library/importlib.rst            |   35 +++
  Doc/whatsnew/3.5.rst                 |    4 +
  Lib/importlib/util.py                |   94 ++++++++++-
  Lib/test/test_importlib/test_lazy.py |  132 +++++++++++++++
  Misc/NEWS                            |    2 +
  5 files changed, 266 insertions(+), 1 deletions(-)


diff --git a/Doc/library/importlib.rst b/Doc/library/importlib.rst
--- a/Doc/library/importlib.rst
+++ b/Doc/library/importlib.rst
@@ -1191,3 +1191,38 @@
    module will be file-based.
 
    .. versionadded:: 3.4
+
+.. class:: LazyLoader(loader)
+
+   A class which postpones the execution of the loader of a module until the
+   module has an attribute accessed.
+
+   This class **only** works with loaders that define
+   :meth:`importlib.abc.Loader.exec_module` as control over what module type
+   is used for the module is required. For the same reasons, the loader
+   **cannot** define :meth:`importlib.abc.Loader.create_module`. Finally,
+   modules which substitute the object placed into :attr:`sys.modules` will
+   not work as there is no way to properly replace the module references
+   throughout the interpreter safely; :exc:`ValueError` is raised if such a
+   substitution is detected.
+
+   .. note::
+      For projects where startup time is critical, this class allows for
+      potentially minimizing the cost of loading a module if it is never used.
+      For projects where startup time is not essential then use of this class is
+      **heavily** discouraged due to error messages created during loading being
+      postponed and thus occurring out of context.
+
+   .. versionadded:: 3.5
+
+   .. classmethod:: factory(loader)
+
+      A static method which returns a callable that creates a lazy loader. This
+      is meant to be used in situations where the loader is passed by class
+      instead of by instance.
+      ::
+
+        suffixes = importlib.machinery.SOURCE_SUFFIXES
+        loader = importlib.machinery.SourceFileLoader
+        lazy_loader = importlib.util.LazyLoader.factory(loader)
+        finder = importlib.machinery.FileFinder(path, [(lazy_loader, suffixes)])
diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst
--- a/Doc/whatsnew/3.5.rst
+++ b/Doc/whatsnew/3.5.rst
@@ -149,6 +149,10 @@
   subclassing of :class:`~inspect.Signature` easier (contributed
   by Yury Selivanov and Eric Snow in :issue:`17373`).
 
+* :class:`importlib.util.LazyLoader` allows for the lazy loading of modules in
+  applications where startup time is paramount (contributed by Brett Cannon in
+  :issue:`17621`).
+
 
 Optimizations
 =============
diff --git a/Lib/importlib/util.py b/Lib/importlib/util.py
--- a/Lib/importlib/util.py
+++ b/Lib/importlib/util.py
@@ -1,5 +1,5 @@
 """Utility code for constructing importers, etc."""
-
+from . import abc
 from ._bootstrap import MAGIC_NUMBER
 from ._bootstrap import cache_from_source
 from ._bootstrap import decode_source
@@ -12,6 +12,7 @@
 from contextlib import contextmanager
 import functools
 import sys
+import types
 import warnings
 
 
@@ -200,3 +201,94 @@
             return fxn(self, module, *args, **kwargs)
 
     return module_for_loader_wrapper
+
+
+class _Module(types.ModuleType):
+
+    """A subclass of the module type to allow __class__ manipulation."""
+
+
+class _LazyModule(types.ModuleType):
+
+    """A subclass of the module type which triggers loading upon attribute access."""
+
+    def __getattribute__(self, attr):
+        """Trigger the load of the module and return the attribute."""
+        # All module metadata must be garnered from __spec__ in order to avoid
+        # using mutated values.
+        # Stop triggering this method.
+        self.__class__ = _Module
+        # Get the original name to make sure no object substitution occurred
+        # in sys.modules.
+        original_name = self.__spec__.name
+        # Figure out exactly what attributes were mutated between the creation
+        # of the module and now.
+        attrs_then = self.__spec__.loader_state
+        attrs_now = self.__dict__
+        attrs_updated = {}
+        for key, value in attrs_now.items():
+            # Code that set the attribute may have kept a reference to the
+            # assigned object, making identity more important than equality.
+            if key not in attrs_then:
+                attrs_updated[key] = value
+            elif id(attrs_now[key]) != id(attrs_then[key]):
+                attrs_updated[key] = value
+        self.__spec__.loader.exec_module(self)
+        # If exec_module() was used directly there is no guarantee the module
+        # object was put into sys.modules.
+        if original_name in sys.modules:
+            if id(self) != id(sys.modules[original_name]):
+                msg = ('module object for {!r} substituted in sys.modules '
+                       'during a lazy load')
+            raise ValueError(msg.format(original_name))
+        # Update after loading since that's what would happen in an eager
+        # loading situation.
+        self.__dict__.update(attrs_updated)
+        return getattr(self, attr)
+
+    def __delattr__(self, attr):
+        """Trigger the load and then perform the deletion."""
+        # To trigger the load and raise an exception if the attribute
+        # doesn't exist.
+        self.__getattribute__(attr)
+        delattr(self, attr)
+
+
+class LazyLoader(abc.Loader):
+
+    """A loader that creates a module which defers loading until attribute access."""
+
+    @staticmethod
+    def __check_eager_loader(loader):
+        if not hasattr(loader, 'exec_module'):
+            raise TypeError('loader must define exec_module()')
+        elif hasattr(loader.__class__, 'create_module'):
+            if abc.Loader.create_module != loader.__class__.create_module:
+                # Only care if create_module() is overridden in a subclass of
+                # importlib.abc.Loader.
+                raise TypeError('loader cannot define create_module()')
+
+    @classmethod
+    def factory(cls, loader):
+        """Construct a callable which returns the eager loader made lazy."""
+        cls.__check_eager_loader(loader)
+        return lambda *args, **kwargs: cls(loader(*args, **kwargs))
+
+    def __init__(self, loader):
+        self.__check_eager_loader(loader)
+        self.loader = loader
+
+    def create_module(self, spec):
+        """Create a module which can have its __class__ manipulated."""
+        return _Module(spec.name)
+
+    def exec_module(self, module):
+        """Make the module load lazily."""
+        module.__spec__.loader = self.loader
+        module.__loader__ = self.loader
+        # Don't need to worry about deep-copying as trying to set an attribute
+        # on an object would have triggered the load,
+        # e.g. ``module.__spec__.loader = None`` would trigger a load from
+        # trying to access module.__spec__.
+        module.__spec__.loader_state = module.__dict__.copy()
+        module.__class__ = _LazyModule
diff --git a/Lib/test/test_importlib/test_lazy.py b/Lib/test/test_importlib/test_lazy.py
new file mode 100644
--- /dev/null
+++ b/Lib/test/test_importlib/test_lazy.py
@@ -0,0 +1,132 @@
+import importlib
+from importlib import abc
+from importlib import util
+import unittest
+
+from . import util as test_util
+
+
+class CollectInit:
+
+    def __init__(self, *args, **kwargs):
+        self.args = args
+        self.kwargs = kwargs
+
+    def exec_module(self, module):
+        return self
+
+
+class LazyLoaderFactoryTests(unittest.TestCase):
+
+    def test_init(self):
+        factory = util.LazyLoader.factory(CollectInit)
+        # E.g. what importlib.machinery.FileFinder instantiates loaders with
+        # plus keyword arguments.
+        lazy_loader = factory('module name', 'module path', kw='kw')
+        loader = lazy_loader.loader
+        self.assertEqual(('module name', 'module path'), loader.args)
+        self.assertEqual({'kw': 'kw'}, loader.kwargs)
+
+    def test_validation(self):
+        # No exec_module(), no lazy loading.
+        with self.assertRaises(TypeError):
+            util.LazyLoader.factory(object)
+
+
+class TestingImporter(abc.MetaPathFinder, abc.Loader):
+
+    module_name = 'lazy_loader_test'
+    mutated_name = 'changed'
+    loaded = None
+    source_code = 'attr = 42; __name__ = {!r}'.format(mutated_name)
+
+    def find_spec(self, name, path, target=None):
+        if name != self.module_name:
+            return None
+        return util.spec_from_loader(name, util.LazyLoader(self))
+
+    def exec_module(self, module):
+        exec(self.source_code, module.__dict__)
+        self.loaded = module
+
+
+class LazyLoaderTests(unittest.TestCase):
+
+    def test_init(self):
+        with self.assertRaises(TypeError):
+            util.LazyLoader(object)
+
+    def new_module(self, source_code=None):
+        loader = TestingImporter()
+        if source_code is not None:
+            loader.source_code = source_code
+        spec = util.spec_from_loader(TestingImporter.module_name,
+                                     util.LazyLoader(loader))
+        module = spec.loader.create_module(spec)
+        module.__spec__ = spec
+        module.__loader__ = spec.loader
+        spec.loader.exec_module(module)
+        # Module is now lazy.
+        self.assertIsNone(loader.loaded)
+        return module
+
+    def test_e2e(self):
+        # End-to-end test to verify the load is in fact lazy.
+        importer = TestingImporter()
+        assert importer.loaded is None
+        with test_util.uncache(importer.module_name):
+            with test_util.import_state(meta_path=[importer]):
+                module = importlib.import_module(importer.module_name)
+        self.assertIsNone(importer.loaded)
+        # Trigger load.
+        self.assertEqual(module.__loader__, importer)
+        self.assertIsNotNone(importer.loaded)
+        self.assertEqual(module, importer.loaded)
+
+    def test_attr_unchanged(self):
+        # An attribute only mutated as a side-effect of import should not be
+        # changed needlessly.
+        module = self.new_module()
+        self.assertEqual(TestingImporter.mutated_name, module.__name__)
+
+    def test_new_attr(self):
+        # A new attribute should persist.
+        module = self.new_module()
+        module.new_attr = 42
+        self.assertEqual(42, module.new_attr)
+
+    def test_mutated_preexisting_attr(self):
+        # Changing an attribute that already existed on the module --
+        # e.g. __name__ -- should persist.
+        module = self.new_module()
+        module.__name__ = 'bogus'
+        self.assertEqual('bogus', module.__name__)
+
+    def test_mutated_attr(self):
+        # Changing an attribute that comes into existence after an import
+        # should persist.
+        module = self.new_module()
+        module.attr = 6
+        self.assertEqual(6, module.attr)
+
+    def test_delete_eventual_attr(self):
+        # Deleting an attribute should stay deleted.
+        module = self.new_module()
+        del module.attr
+        self.assertFalse(hasattr(module, 'attr'))
+
+    def test_delete_preexisting_attr(self):
+        module = self.new_module()
+        del module.__name__
+        self.assertFalse(hasattr(module, '__name__'))
+
+    def test_module_substitution_error(self):
+        source_code = 'import sys; sys.modules[__name__] = 42'
+        module = self.new_module(source_code)
+        with test_util.uncache(TestingImporter.module_name):
+            with self.assertRaises(ValueError):
+                module.__name__
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -29,6 +29,8 @@
 Library
 -------
 
+- Issue #17621: Introduce importlib.util.LazyLoader.
+
 - Issue #21076: signal module constants were turned into enums.
   Patch by Giampaolo Rodola'.
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list