[Python-checkins] bpo-43926: Cleaner metadata with PEP 566 JSON support. (GH-25565)

jaraco webhook-mailer at python.org
Sun May 2 17:03:48 EDT 2021


https://github.com/python/cpython/commit/37e0c7850de902179b28f1378fbbc38a5ed3628c
commit: 37e0c7850de902179b28f1378fbbc38a5ed3628c
branch: master
author: Jason R. Coombs <jaraco at jaraco.com>
committer: jaraco <jaraco at jaraco.com>
date: 2021-05-02T17:03:40-04:00
summary:

bpo-43926: Cleaner metadata with PEP 566 JSON support. (GH-25565)

* bpo-43926: Cleaner metadata with PEP 566 JSON support.

* Add blurb

* Add versionchanged and versionadded declarations for changes to metadata.

* Use descriptor for PEP 566

files:
A Lib/importlib/metadata/__init__.py
A Lib/importlib/metadata/_adapters.py
A Lib/importlib/metadata/_collections.py
A Lib/importlib/metadata/_functools.py
A Lib/importlib/metadata/_itertools.py
A Lib/importlib/metadata/_meta.py
A Lib/importlib/metadata/_text.py
A Misc/NEWS.d/next/Library/2021-04-23-17-48-55.bpo-43926.HMUlGU.rst
D Lib/importlib/_collections.py
D Lib/importlib/_functools.py
D Lib/importlib/_itertools.py
D Lib/importlib/metadata.py
M Doc/library/importlib.metadata.rst
M Lib/test/test_importlib/fixtures.py
M Lib/test/test_importlib/test_main.py
M Lib/test/test_importlib/test_metadata_api.py

diff --git a/Doc/library/importlib.metadata.rst b/Doc/library/importlib.metadata.rst
index 40e48d1beec0c7..9bedee5af28f69 100644
--- a/Doc/library/importlib.metadata.rst
+++ b/Doc/library/importlib.metadata.rst
@@ -170,6 +170,19 @@ the values are returned unparsed from the distribution metadata::
     >>> wheel_metadata['Requires-Python']  # doctest: +SKIP
     '>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*'
 
+``PackageMetadata`` also presents a ``json`` attribute that returns
+all the metadata in a JSON-compatible form per :PEP:`566`::
+
+    >>> wheel_metadata.json['requires_python']
+    '>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*'
+
+.. versionchanged:: 3.10
+   The ``Description`` is now included in the metadata when presented
+   through the payload. Line continuation characters have been removed.
+
+.. versionadded:: 3.10
+   The ``json`` attribute was added.
+
 
 .. _version:
 
diff --git a/Lib/importlib/metadata.py b/Lib/importlib/metadata/__init__.py
similarity index 97%
rename from Lib/importlib/metadata.py
rename to Lib/importlib/metadata/__init__.py
index 7a427eb3b2870a..142162196fa9b3 100644
--- a/Lib/importlib/metadata.py
+++ b/Lib/importlib/metadata/__init__.py
@@ -14,6 +14,7 @@
 import posixpath
 import collections
 
+from . import _adapters, _meta
 from ._collections import FreezableDefaultDict, Pair
 from ._functools import method_cache
 from ._itertools import unique_everseen
@@ -22,7 +23,7 @@
 from importlib import import_module
 from importlib.abc import MetaPathFinder
 from itertools import starmap
-from typing import Any, List, Mapping, Optional, Protocol, TypeVar, Union
+from typing import List, Mapping, Optional, Union
 
 
 __all__ = [
@@ -385,25 +386,6 @@ def __repr__(self):
         return '<FileHash mode: {} value: {}>'.format(self.mode, self.value)
 
 
-_T = TypeVar("_T")
-
-
-class PackageMetadata(Protocol):
-    def __len__(self) -> int:
-        ...  # pragma: no cover
-
-    def __contains__(self, item: str) -> bool:
-        ...  # pragma: no cover
-
-    def __getitem__(self, key: str) -> str:
-        ...  # pragma: no cover
-
-    def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]:
-        """
-        Return all values associated with a possibly multi-valued key.
-        """
-
-
 class Distribution:
     """A Python distribution package."""
 
@@ -488,7 +470,7 @@ def _local(cls, root='.'):
         return PathDistribution(zipfile.Path(meta.build_as_zip(builder)))
 
     @property
-    def metadata(self) -> PackageMetadata:
+    def metadata(self) -> _meta.PackageMetadata:
         """Return the parsed metadata for this Distribution.
 
         The returned object will have keys that name the various bits of
@@ -502,7 +484,7 @@ def metadata(self) -> PackageMetadata:
             # (which points to the egg-info file) attribute unchanged.
             or self.read_text('')
         )
-        return email.message_from_string(text)
+        return _adapters.Message(email.message_from_string(text))
 
     @property
     def name(self):
@@ -829,7 +811,7 @@ def distributions(**kwargs):
     return Distribution.discover(**kwargs)
 
 
-def metadata(distribution_name) -> PackageMetadata:
+def metadata(distribution_name) -> _meta.PackageMetadata:
     """Get the metadata for the named package.
 
     :param distribution_name: The name of the distribution package to query.
diff --git a/Lib/importlib/metadata/_adapters.py b/Lib/importlib/metadata/_adapters.py
new file mode 100644
index 00000000000000..ab086180fc35fc
--- /dev/null
+++ b/Lib/importlib/metadata/_adapters.py
@@ -0,0 +1,67 @@
+import re
+import textwrap
+import email.message
+
+from ._text import FoldedCase
+
+
+class Message(email.message.Message):
+    multiple_use_keys = set(
+        map(
+            FoldedCase,
+            [
+                'Classifier',
+                'Obsoletes-Dist',
+                'Platform',
+                'Project-URL',
+                'Provides-Dist',
+                'Provides-Extra',
+                'Requires-Dist',
+                'Requires-External',
+                'Supported-Platform',
+            ],
+        )
+    )
+    """
+    Keys that may be indicated multiple times per PEP 566.
+    """
+
+    def __new__(cls, orig: email.message.Message):
+        res = super().__new__(cls)
+        vars(res).update(vars(orig))
+        return res
+
+    def __init__(self, *args, **kwargs):
+        self._headers = self._repair_headers()
+
+    # suppress spurious error from mypy
+    def __iter__(self):
+        return super().__iter__()
+
+    def _repair_headers(self):
+        def redent(value):
+            "Correct for RFC822 indentation"
+            if not value or '\n' not in value:
+                return value
+            return textwrap.dedent(' ' * 8 + value)
+
+        headers = [(key, redent(value)) for key, value in vars(self)['_headers']]
+        if self._payload:
+            headers.append(('Description', self.get_payload()))
+        return headers
+
+    @property
+    def json(self):
+        """
+        Convert PackageMetadata to a JSON-compatible format
+        per PEP 0566.
+        """
+
+        def transform(key):
+            value = self.get_all(key) if key in self.multiple_use_keys else self[key]
+            if key == 'Keywords':
+                value = re.split(r'\s+', value)
+            tk = key.lower().replace('-', '_')
+            return tk, value
+
+        return dict(map(transform, map(FoldedCase, self)))
diff --git a/Lib/importlib/_collections.py b/Lib/importlib/metadata/_collections.py
similarity index 100%
rename from Lib/importlib/_collections.py
rename to Lib/importlib/metadata/_collections.py
diff --git a/Lib/importlib/_functools.py b/Lib/importlib/metadata/_functools.py
similarity index 100%
rename from Lib/importlib/_functools.py
rename to Lib/importlib/metadata/_functools.py
diff --git a/Lib/importlib/_itertools.py b/Lib/importlib/metadata/_itertools.py
similarity index 100%
rename from Lib/importlib/_itertools.py
rename to Lib/importlib/metadata/_itertools.py
diff --git a/Lib/importlib/metadata/_meta.py b/Lib/importlib/metadata/_meta.py
new file mode 100644
index 00000000000000..04d9a0235368ed
--- /dev/null
+++ b/Lib/importlib/metadata/_meta.py
@@ -0,0 +1,29 @@
+from typing import Any, Dict, Iterator, List, Protocol, TypeVar, Union
+
+
+_T = TypeVar("_T")
+
+
+class PackageMetadata(Protocol):
+    def __len__(self) -> int:
+        ...  # pragma: no cover
+
+    def __contains__(self, item: str) -> bool:
+        ...  # pragma: no cover
+
+    def __getitem__(self, key: str) -> str:
+        ...  # pragma: no cover
+
+    def __iter__(self) -> Iterator[str]:
+        ...  # pragma: no cover
+
+    def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]:
+        """
+        Return all values associated with a possibly multi-valued key.
+        """
+
+    @property
+    def json(self) -> Dict[str, Union[str, List[str]]]:
+        """
+        A JSON-compatible form of the metadata.
+        """
diff --git a/Lib/importlib/metadata/_text.py b/Lib/importlib/metadata/_text.py
new file mode 100644
index 00000000000000..766979d93c1694
--- /dev/null
+++ b/Lib/importlib/metadata/_text.py
@@ -0,0 +1,99 @@
+import re
+
+from ._functools import method_cache
+
+
+# from jaraco.text 3.5
+class FoldedCase(str):
+    """
+    A case insensitive string class; behaves just like str
+    except compares equal when the only variation is case.
+
+    >>> s = FoldedCase('hello world')
+
+    >>> s == 'Hello World'
+    True
+
+    >>> 'Hello World' == s
+    True
+
+    >>> s != 'Hello World'
+    False
+
+    >>> s.index('O')
+    4
+
+    >>> s.split('O')
+    ['hell', ' w', 'rld']
+
+    >>> sorted(map(FoldedCase, ['GAMMA', 'alpha', 'Beta']))
+    ['alpha', 'Beta', 'GAMMA']
+
+    Sequence membership is straightforward.
+
+    >>> "Hello World" in [s]
+    True
+    >>> s in ["Hello World"]
+    True
+
+    You may test for set inclusion, but candidate and elements
+    must both be folded.
+
+    >>> FoldedCase("Hello World") in {s}
+    True
+    >>> s in {FoldedCase("Hello World")}
+    True
+
+    String inclusion works as long as the FoldedCase object
+    is on the right.
+
+    >>> "hello" in FoldedCase("Hello World")
+    True
+
+    But not if the FoldedCase object is on the left:
+
+    >>> FoldedCase('hello') in 'Hello World'
+    False
+
+    In that case, use in_:
+
+    >>> FoldedCase('hello').in_('Hello World')
+    True
+
+    >>> FoldedCase('hello') > FoldedCase('Hello')
+    False
+    """
+
+    def __lt__(self, other):
+        return self.lower() < other.lower()
+
+    def __gt__(self, other):
+        return self.lower() > other.lower()
+
+    def __eq__(self, other):
+        return self.lower() == other.lower()
+
+    def __ne__(self, other):
+        return self.lower() != other.lower()
+
+    def __hash__(self):
+        return hash(self.lower())
+
+    def __contains__(self, other):
+        return super(FoldedCase, self).lower().__contains__(other.lower())
+
+    def in_(self, other):
+        "Does self appear in other?"
+        return self in FoldedCase(other)
+
+    # cache lower since it's likely to be called frequently.
+    @method_cache
+    def lower(self):
+        return super(FoldedCase, self).lower()
+
+    def index(self, sub):
+        return self.lower().index(sub.lower())
+
+    def split(self, splitter=' ', maxsplit=0):
+        pattern = re.compile(re.escape(splitter), re.I)
+        return pattern.split(self, maxsplit)
diff --git a/Lib/test/test_importlib/fixtures.py b/Lib/test/test_importlib/fixtures.py
index 1ae70c70f10a55..12ed07d33744a5 100644
--- a/Lib/test/test_importlib/fixtures.py
+++ b/Lib/test/test_importlib/fixtures.py
@@ -1,5 +1,6 @@
 import os
 import sys
+import copy
 import shutil
 import pathlib
 import tempfile
@@ -108,6 +109,16 @@ def setUp(self):
         super(DistInfoPkg, self).setUp()
         build_files(DistInfoPkg.files, self.site_dir)
 
+    def make_uppercase(self):
+        """
+        Rewrite metadata with everything uppercase.
+        """
+        shutil.rmtree(self.site_dir / "distinfo_pkg-1.0.0.dist-info")
+        files = copy.deepcopy(DistInfoPkg.files)
+        info = files["distinfo_pkg-1.0.0.dist-info"]
+        info["METADATA"] = info["METADATA"].upper()
+        build_files(files, self.site_dir)
+
 
 class DistInfoPkgWithDot(OnSysPath, SiteDir):
     files: FilesDef = {
diff --git a/Lib/test/test_importlib/test_main.py b/Lib/test/test_importlib/test_main.py
index 08069c9a5de77d..52cb63712a5cb0 100644
--- a/Lib/test/test_importlib/test_main.py
+++ b/Lib/test/test_importlib/test_main.py
@@ -125,7 +125,7 @@ def pkg_with_non_ascii_description(site_dir):
         metadata_dir.mkdir()
         metadata = metadata_dir / 'METADATA'
         with metadata.open('w', encoding='utf-8') as fp:
-            fp.write('Description: pôrˈtend\n')
+            fp.write('Description: pôrˈtend')
         return 'portend'
 
     @staticmethod
@@ -145,7 +145,7 @@ def pkg_with_non_ascii_description_egg_info(site_dir):
 
                 pôrˈtend
                 """
-                ).lstrip()
+                ).strip()
             )
         return 'portend'
 
@@ -157,7 +157,7 @@ def test_metadata_loads(self):
     def test_metadata_loads_egg_info(self):
         pkg_name = self.pkg_with_non_ascii_description_egg_info(self.site_dir)
         meta = metadata(pkg_name)
-        assert meta.get_payload() == 'pôrˈtend\n'
+        assert meta['Description'] == 'pôrˈtend'
 
 
 class DiscoveryTests(fixtures.EggInfoPkg, fixtures.DistInfoPkg, unittest.TestCase):
diff --git a/Lib/test/test_importlib/test_metadata_api.py b/Lib/test/test_importlib/test_metadata_api.py
index 657c16603f668a..825edc10f121a2 100644
--- a/Lib/test/test_importlib/test_metadata_api.py
+++ b/Lib/test/test_importlib/test_metadata_api.py
@@ -231,6 +231,29 @@ def test_more_complex_deps_requires_text(self):
 
         assert deps == expected
 
+    def test_as_json(self):
+        md = metadata('distinfo-pkg').json
+        assert 'name' in md
+        assert md['keywords'] == ['sample', 'package']
+        desc = md['description']
+        assert desc.startswith('Once upon a time\nThere was')
+        assert len(md['requires_dist']) == 2
+
+    def test_as_json_egg_info(self):
+        md = metadata('egginfo-pkg').json
+        assert 'name' in md
+        assert md['keywords'] == ['sample', 'package']
+        desc = md['description']
+        assert desc.startswith('Once upon a time\nThere was')
+        assert len(md['classifier']) == 2
+
+    def test_as_json_odd_case(self):
+        self.make_uppercase()
+        md = metadata('distinfo-pkg').json
+        assert 'name' in md
+        assert len(md['requires_dist']) == 2
+        assert md['keywords'] == ['SAMPLE', 'PACKAGE']
+
 
 class LegacyDots(fixtures.DistInfoPkgWithDotLegacy, unittest.TestCase):
     def test_name_normalization(self):
diff --git a/Misc/NEWS.d/next/Library/2021-04-23-17-48-55.bpo-43926.HMUlGU.rst b/Misc/NEWS.d/next/Library/2021-04-23-17-48-55.bpo-43926.HMUlGU.rst
new file mode 100644
index 00000000000000..45f29a84cd599f
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-04-23-17-48-55.bpo-43926.HMUlGU.rst
@@ -0,0 +1,4 @@
+In ``importlib.metadata``, provide a uniform interface to ``Description``,
+allow for any field to be encoded with multiline values, remove continuation
+lines from multiline values, and add a ``.json`` property for easy access to
+the PEP 566 JSON-compatible form. Sync with ``importlib_metadata 4.0``.



More information about the Python-checkins mailing list