[Python-checkins] bpo-44246: Entry points performance improvements. (GH-26467)

jaraco webhook-mailer at python.org
Mon May 31 11:52:38 EDT 2021


https://github.com/python/cpython/commit/410b70d39d9d77384f8b8597560f6731530149ca
commit: 410b70d39d9d77384f8b8597560f6731530149ca
branch: main
author: Jason R. Coombs <jaraco at jaraco.com>
committer: jaraco <jaraco at jaraco.com>
date: 2021-05-31T11:52:29-04:00
summary:

bpo-44246: Entry points performance improvements. (GH-26467)

>From importlib_metadata 4.3.1.

files:
A Misc/NEWS.d/next/Library/2021-05-31-11-28-03.bpo-44246.nhmt-v.rst
M Lib/importlib/metadata/__init__.py
M Lib/test/test_importlib/test_zip.py

diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py
index 94b83869a68550..2e3403e5a565cb 100644
--- a/Lib/importlib/metadata/__init__.py
+++ b/Lib/importlib/metadata/__init__.py
@@ -493,6 +493,11 @@ def name(self):
         """Return the 'Name' metadata for the distribution package."""
         return self.metadata['Name']
 
+    @property
+    def _normalized_name(self):
+        """Return a normalized version of the name."""
+        return Prepared.normalize(self.name)
+
     @property
     def version(self):
         """Return the 'Version' metadata for the distribution package."""
@@ -795,6 +800,22 @@ def read_text(self, filename):
     def locate_file(self, path):
         return self._path.parent / path
 
+    @property
+    def _normalized_name(self):
+        """
+        Performance optimization: where possible, resolve the
+        normalized name from the file system path.
+        """
+        stem = os.path.basename(str(self._path))
+        return self._name_from_stem(stem) or super()._normalized_name
+
+    def _name_from_stem(self, stem):
+        name, ext = os.path.splitext(stem)
+        if ext not in ('.dist-info', '.egg-info'):
+            return
+        name, sep, rest = stem.partition('-')
+        return name
+
 
 def distribution(distribution_name):
     """Get the ``Distribution`` instance for the named package.
@@ -849,7 +870,8 @@ def entry_points(**params) -> Union[EntryPoints, SelectableGroups]:
 
     :return: EntryPoints or SelectableGroups for all installed packages.
     """
-    unique = functools.partial(unique_everseen, key=operator.attrgetter('name'))
+    norm_name = operator.attrgetter('_normalized_name')
+    unique = functools.partial(unique_everseen, key=norm_name)
     eps = itertools.chain.from_iterable(
         dist.entry_points for dist in unique(distributions())
     )
diff --git a/Lib/test/test_importlib/test_zip.py b/Lib/test/test_importlib/test_zip.py
index 83e041385e0e8c..bf16a3b95e18c5 100644
--- a/Lib/test/test_importlib/test_zip.py
+++ b/Lib/test/test_importlib/test_zip.py
@@ -76,3 +76,7 @@ def test_files(self):
         for file in files('example'):
             path = str(file.dist.locate_file(file))
             assert '.egg/' in path, path
+
+    def test_normalized_name(self):
+        dist = distribution('example')
+        assert dist._normalized_name == 'example'
diff --git a/Misc/NEWS.d/next/Library/2021-05-31-11-28-03.bpo-44246.nhmt-v.rst b/Misc/NEWS.d/next/Library/2021-05-31-11-28-03.bpo-44246.nhmt-v.rst
new file mode 100644
index 00000000000000..727d9fd0a19d8a
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-05-31-11-28-03.bpo-44246.nhmt-v.rst
@@ -0,0 +1,3 @@
+In importlib.metadata.entry_points, de-duplication of distributions no
+longer requires loading the full metadata for PathDistribution objects,
+improving entry point loading performance by ~10x.



More information about the Python-checkins mailing list