[Python-checkins] bpo-42043: Add support for zipfile.Path subclasses (#22716)

jaraco webhook-mailer at python.org
Sun Oct 25 14:45:13 EDT 2020


https://github.com/python/cpython/commit/d1a0a960ee493262fb95a0f5b795b5b6d75cecb8
commit: d1a0a960ee493262fb95a0f5b795b5b6d75cecb8
branch: master
author: Jason R. Coombs <jaraco at jaraco.com>
committer: jaraco <jaraco at jaraco.com>
date: 2020-10-25T14:45:05-04:00
summary:

bpo-42043: Add support for zipfile.Path subclasses (#22716)

* bpo-42043: Add support for zipfile.Path inheritance as introduced in zipp 3.2.0.

* Add blurb.

files:
A Misc/NEWS.d/next/Library/2020-10-15-17-20-37.bpo-42043.OS0p_v.rst
M Lib/test/test_zipfile.py
M Lib/zipfile.py

diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index 3bb9ce995c2a1..b3c24213f3474 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -13,6 +13,7 @@
 import unittest
 import unittest.mock as mock
 import zipfile
+import functools
 
 
 from tempfile import TemporaryFile
@@ -2836,6 +2837,20 @@ def build_alpharep_fixture():
     return zf
 
 
+def pass_alpharep(meth):
+    """
+    Given a method, wrap it in a for loop that invokes method
+    with each subtest.
+    """
+
+    @functools.wraps(meth)
+    def wrapper(self):
+        for alpharep in self.zipfile_alpharep():
+            meth(self, alpharep=alpharep)
+
+    return wrapper
+
+
 class TestPath(unittest.TestCase):
     def setUp(self):
         self.fixtures = contextlib.ExitStack()
@@ -2847,47 +2862,58 @@ def zipfile_alpharep(self):
         with self.subTest():
             yield add_dirs(build_alpharep_fixture())
 
-    def zipfile_ondisk(self):
+    def zipfile_ondisk(self, alpharep):
         tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir()))
-        for alpharep in self.zipfile_alpharep():
-            buffer = alpharep.fp
-            alpharep.close()
-            path = tmpdir / alpharep.filename
-            with path.open("wb") as strm:
-                strm.write(buffer.getvalue())
-            yield path
-
-    def test_iterdir_and_types(self):
-        for alpharep in self.zipfile_alpharep():
-            root = zipfile.Path(alpharep)
-            assert root.is_dir()
-            a, b, g = root.iterdir()
-            assert a.is_file()
-            assert b.is_dir()
-            assert g.is_dir()
-            c, f, d = b.iterdir()
-            assert c.is_file() and f.is_file()
-            e, = d.iterdir()
-            assert e.is_file()
-            h, = g.iterdir()
-            i, = h.iterdir()
-            assert i.is_file()
-
-    def test_subdir_is_dir(self):
-        for alpharep in self.zipfile_alpharep():
-            root = zipfile.Path(alpharep)
-            assert (root / 'b').is_dir()
-            assert (root / 'b/').is_dir()
-            assert (root / 'g').is_dir()
-            assert (root / 'g/').is_dir()
-
-    def test_open(self):
-        for alpharep in self.zipfile_alpharep():
-            root = zipfile.Path(alpharep)
-            a, b, g = root.iterdir()
-            with a.open() as strm:
-                data = strm.read()
-            assert data == "content of a"
+        buffer = alpharep.fp
+        alpharep.close()
+        path = tmpdir / alpharep.filename
+        with path.open("wb") as strm:
+            strm.write(buffer.getvalue())
+        return path
+
+    @pass_alpharep
+    def test_iterdir_and_types(self, alpharep):
+        root = zipfile.Path(alpharep)
+        assert root.is_dir()
+        a, b, g = root.iterdir()
+        assert a.is_file()
+        assert b.is_dir()
+        assert g.is_dir()
+        c, f, d = b.iterdir()
+        assert c.is_file() and f.is_file()
+        (e,) = d.iterdir()
+        assert e.is_file()
+        (h,) = g.iterdir()
+        (i,) = h.iterdir()
+        assert i.is_file()
+
+    @pass_alpharep
+    def test_is_file_missing(self, alpharep):
+        root = zipfile.Path(alpharep)
+        assert not root.joinpath('missing.txt').is_file()
+
+    @pass_alpharep
+    def test_iterdir_on_file(self, alpharep):
+        root = zipfile.Path(alpharep)
+        a, b, g = root.iterdir()
+        with self.assertRaises(ValueError):
+            a.iterdir()
+
+    @pass_alpharep
+    def test_subdir_is_dir(self, alpharep):
+        root = zipfile.Path(alpharep)
+        assert (root / 'b').is_dir()
+        assert (root / 'b/').is_dir()
+        assert (root / 'g').is_dir()
+        assert (root / 'g/').is_dir()
+
+    @pass_alpharep
+    def test_open(self, alpharep):
+        root = zipfile.Path(alpharep)
+        a, b, g = root.iterdir()
+        with a.open() as strm:
+            data = strm.read()
+        assert data == "content of a"
 
     def test_open_write(self):
         """
@@ -2908,6 +2934,14 @@ def test_open_extant_directory(self):
         with self.assertRaises(IsADirectoryError):
             zf.joinpath('b').open()
 
+    @pass_alpharep
+    def test_open_binary_invalid_args(self, alpharep):
+        root = zipfile.Path(alpharep)
+        with self.assertRaises(ValueError):
+            root.joinpath('a.txt').open('rb', encoding='utf-8')
+        with self.assertRaises(ValueError):
+            root.joinpath('a.txt').open('rb', 'utf-8')
+
     def test_open_missing_directory(self):
         """
         Attempting to open a missing directory raises FileNotFoundError.
@@ -2916,75 +2950,87 @@ def test_open_missing_directory(self):
         with self.assertRaises(FileNotFoundError):
             zf.joinpath('z').open()
 
-    def test_read(self):
-        for alpharep in self.zipfile_alpharep():
-            root = zipfile.Path(alpharep)
-            a, b, g = root.iterdir()
-            assert a.read_text() == "content of a"
-            assert a.read_bytes() == b"content of a"
-
-    def test_joinpath(self):
-        for alpharep in self.zipfile_alpharep():
-            root = zipfile.Path(alpharep)
-            a = root.joinpath("a")
-            assert a.is_file()
-            e = root.joinpath("b").joinpath("d").joinpath("e.txt")
-            assert e.read_text() == "content of e"
-
-    def test_traverse_truediv(self):
-        for alpharep in self.zipfile_alpharep():
-            root = zipfile.Path(alpharep)
-            a = root / "a"
-            assert a.is_file()
-            e = root / "b" / "d" / "e.txt"
-            assert e.read_text() == "content of e"
+    @pass_alpharep
+    def test_read(self, alpharep):
+        root = zipfile.Path(alpharep)
+        a, b, g = root.iterdir()
+        assert a.read_text() == "content of a"
+        assert a.read_bytes() == b"content of a"
+
+    @pass_alpharep
+    def test_joinpath(self, alpharep):
+        root = zipfile.Path(alpharep)
+        a = root.joinpath("a.txt")
+        assert a.is_file()
+        e = root.joinpath("b").joinpath("d").joinpath("e.txt")
+        assert e.read_text() == "content of e"
+
+    @pass_alpharep
+    def test_traverse_truediv(self, alpharep):
+        root = zipfile.Path(alpharep)
+        a = root / "a.txt"
+        assert a.is_file()
+        e = root / "b" / "d" / "e.txt"
+        assert e.read_text() == "content of e"
+
+    @pass_alpharep
+    def test_traverse_simplediv(self, alpharep):
+        """
+        Disable the __future__.division when testing traversal.
+        """
+        code = compile(
+            source="zipfile.Path(alpharep) / 'a'",
+            filename="(test)",
+            mode="eval",
+            dont_inherit=True,
+        )
+        eval(code)
 
-    def test_pathlike_construction(self):
+    @pass_alpharep
+    def test_pathlike_construction(self, alpharep):
         """
         zipfile.Path should be constructable from a path-like object
         """
-        for zipfile_ondisk in self.zipfile_ondisk():
-            pathlike = pathlib.Path(str(zipfile_ondisk))
-            zipfile.Path(pathlike)
-
-    def test_traverse_pathlike(self):
-        for alpharep in self.zipfile_alpharep():
-            root = zipfile.Path(alpharep)
-            root / pathlib.Path("a")
-
-    def test_parent(self):
-        for alpharep in self.zipfile_alpharep():
-            root = zipfile.Path(alpharep)
-            assert (root / 'a').parent.at == ''
-            assert (root / 'a' / 'b').parent.at == 'a/'
-
-    def test_dir_parent(self):
-        for alpharep in self.zipfile_alpharep():
-            root = zipfile.Path(alpharep)
-            assert (root / 'b').parent.at == ''
-            assert (root / 'b/').parent.at == ''
-
-    def test_missing_dir_parent(self):
-        for alpharep in self.zipfile_alpharep():
-            root = zipfile.Path(alpharep)
-            assert (root / 'missing dir/').parent.at == ''
-
-    def test_mutability(self):
+        zipfile_ondisk = self.zipfile_ondisk(alpharep)
+        pathlike = pathlib.Path(str(zipfile_ondisk))
+        zipfile.Path(pathlike)
+
+    @pass_alpharep
+    def test_traverse_pathlike(self, alpharep):
+        root = zipfile.Path(alpharep)
+        root / pathlib.Path("a")
+
+    @pass_alpharep
+    def test_parent(self, alpharep):
+        root = zipfile.Path(alpharep)
+        assert (root / 'a').parent.at == ''
+        assert (root / 'a' / 'b').parent.at == 'a/'
+
+    @pass_alpharep
+    def test_dir_parent(self, alpharep):
+        root = zipfile.Path(alpharep)
+        assert (root / 'b').parent.at == ''
+        assert (root / 'b/').parent.at == ''
+
+    @pass_alpharep
+    def test_missing_dir_parent(self, alpharep):
+        root = zipfile.Path(alpharep)
+        assert (root / 'missing dir/').parent.at == ''
+
+    @pass_alpharep
+    def test_mutability(self, alpharep):
         """
         If the underlying zipfile is changed, the Path object should
         reflect that change.
         """
-        for alpharep in self.zipfile_alpharep():
-            root = zipfile.Path(alpharep)
-            a, b, g = root.iterdir()
-            alpharep.writestr('foo.txt', 'foo')
-            alpharep.writestr('bar/baz.txt', 'baz')
-            assert any(
-                child.name == 'foo.txt'
-                for child in root.iterdir())
-            assert (root / 'foo.txt').read_text() == 'foo'
-            baz, = (root / 'bar').iterdir()
-            assert baz.read_text() == 'baz'
+        root = zipfile.Path(alpharep)
+        a, b, g = root.iterdir()
+        alpharep.writestr('foo.txt', 'foo')
+        alpharep.writestr('bar/baz.txt', 'baz')
+        assert any(child.name == 'foo.txt' for child in root.iterdir())
+        assert (root / 'foo.txt').read_text() == 'foo'
+        (baz,) = (root / 'bar').iterdir()
+        assert baz.read_text() == 'baz'
 
     HUGE_ZIPFILE_NUM_ENTRIES = 2 ** 13
 
@@ -3013,11 +3059,65 @@ def test_implied_dirs_performance(self):
         data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)]
         zipfile.CompleteDirs._implied_dirs(data)
 
-    def test_read_does_not_close(self):
-        for alpharep in self.zipfile_ondisk():
-            with zipfile.ZipFile(alpharep) as file:
-                for rep in range(2):
-                    zipfile.Path(file, 'a.txt').read_text()
+    @pass_alpharep
+    def test_read_does_not_close(self, alpharep):
+        alpharep = self.zipfile_ondisk(alpharep)
+        with zipfile.ZipFile(alpharep) as file:
+            for rep in range(2):
+                zipfile.Path(file, 'a.txt').read_text()
+
+    @pass_alpharep
+    def test_subclass(self, alpharep):
+        class Subclass(zipfile.Path):
+            pass
+
+        root = Subclass(alpharep)
+        assert isinstance(root / 'b', Subclass)
+
+    @pass_alpharep
+    def test_filename(self, alpharep):
+        root = zipfile.Path(alpharep)
+        assert root.filename == pathlib.Path('alpharep.zip')
+
+    @pass_alpharep
+    def test_root_name(self, alpharep):
+        """
+        The name of the root should be the name of the zipfile
+        """
+        root = zipfile.Path(alpharep)
+        assert root.name == 'alpharep.zip' == root.filename.name
+
+    @pass_alpharep
+    def test_root_parent(self, alpharep):
+        root = zipfile.Path(alpharep)
+        assert root.parent == pathlib.Path('.')
+        root.root.filename = 'foo/bar.zip'
+        assert root.parent == pathlib.Path('foo')
+
+    @pass_alpharep
+    def test_root_unnamed(self, alpharep):
+        """
+        It is an error to attempt to get the name
+        or parent of an unnamed zipfile.
+        """
+        alpharep.filename = None
+        root = zipfile.Path(alpharep)
+        with self.assertRaises(TypeError):
+            root.name
+        with self.assertRaises(TypeError):
+            root.parent
+
+        # .name and .parent should still work on subs
+        sub = root / "b"
+        assert sub.name == "b"
+        assert sub.parent
+
+    @pass_alpharep
+    def test_inheritance(self, alpharep):
+        cls = type('PathChild', (zipfile.Path,), {})
+        for alpharep in self.zipfile_alpharep():
+            file = cls(alpharep).joinpath('some dir').parent
+            assert isinstance(file, cls)
 
 
 if __name__ == "__main__":
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index da3e40e5dbd41..e1a50a3eb51d9 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -16,6 +16,7 @@
 import threading
 import time
 import contextlib
+import pathlib
 
 try:
     import zlib # We may need its compression method
@@ -2210,6 +2211,7 @@ class FastLookup(CompleteDirs):
     ZipFile subclass to ensure implicit
     dirs exist and are resolved rapidly.
     """
+
     def namelist(self):
         with contextlib.suppress(AttributeError):
             return self.__names
@@ -2241,7 +2243,7 @@ class Path:
     >>> zf.writestr('a.txt', 'content of a')
     >>> zf.writestr('b/c.txt', 'content of c')
     >>> zf.writestr('b/d/e.txt', 'content of e')
-    >>> zf.filename = 'abcde.zip'
+    >>> zf.filename = 'mem/abcde.zip'
 
     Path accepts the zipfile object itself or a filename
 
@@ -2253,9 +2255,9 @@ class Path:
 
     >>> a, b = root.iterdir()
     >>> a
-    Path('abcde.zip', 'a.txt')
+    Path('mem/abcde.zip', 'a.txt')
     >>> b
-    Path('abcde.zip', 'b/')
+    Path('mem/abcde.zip', 'b/')
 
     name property:
 
@@ -2266,7 +2268,7 @@ class Path:
 
     >>> c = b / 'c.txt'
     >>> c
-    Path('abcde.zip', 'b/c.txt')
+    Path('mem/abcde.zip', 'b/c.txt')
     >>> c.name
     'c.txt'
 
@@ -2284,8 +2286,21 @@ class Path:
 
     Coercion to string:
 
-    >>> str(c)
-    'abcde.zip/b/c.txt'
+    >>> import os
+    >>> str(c).replace(os.sep, posixpath.sep)
+    'mem/abcde.zip/b/c.txt'
+
+    At the root, ``name``, ``filename``, and ``parent``
+    resolve to the zipfile. Note these attributes are not
+    valid and will raise a ``ValueError`` if the zipfile
+    has no filename.
+
+    >>> root.name
+    'abcde.zip'
+    >>> str(root.filename).replace(os.sep, posixpath.sep)
+    'mem/abcde.zip'
+    >>> str(root.parent)
+    'mem'
     """
 
     __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
@@ -2323,7 +2338,11 @@ def open(self, mode='r', *args, pwd=None, **kwargs):
 
     @property
     def name(self):
-        return posixpath.basename(self.at.rstrip("/"))
+        return pathlib.Path(self.at).name or self.filename.name
+
+    @property
+    def filename(self):
+        return pathlib.Path(self.root.filename).joinpath(self.at)
 
     def read_text(self, *args, **kwargs):
         with self.open('r', *args, **kwargs) as strm:
@@ -2337,13 +2356,13 @@ def _is_child(self, path):
         return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
 
     def _next(self, at):
-        return Path(self.root, at)
+        return self.__class__(self.root, at)
 
     def is_dir(self):
         return not self.at or self.at.endswith("/")
 
     def is_file(self):
-        return not self.is_dir()
+        return self.exists() and not self.is_dir()
 
     def exists(self):
         return self.at in self.root._name_set()
@@ -2368,6 +2387,8 @@ def joinpath(self, add):
 
     @property
     def parent(self):
+        if not self.at:
+            return self.filename.parent
         parent_at = posixpath.dirname(self.at.rstrip('/'))
         if parent_at:
             parent_at += '/'
diff --git a/Misc/NEWS.d/next/Library/2020-10-15-17-20-37.bpo-42043.OS0p_v.rst b/Misc/NEWS.d/next/Library/2020-10-15-17-20-37.bpo-42043.OS0p_v.rst
new file mode 100644
index 0000000000000..b6b296956c35d
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-10-15-17-20-37.bpo-42043.OS0p_v.rst
@@ -0,0 +1,4 @@
+Add support for ``zipfile.Path`` inheritance. ``zipfile.Path.is_file()`` now
+returns False for non-existent names. ``zipfile.Path`` objects now expose a
+``.filename`` attribute and rely on that to resolve ``.name`` and
+``.parent`` when the ``Path`` object is at the root of the zipfile.



More information about the Python-checkins mailing list