[Python-checkins] bpo-36832: add zipfile.Path (#13153)

Barry Warsaw webhook-mailer at python.org
Wed May 8 09:45:11 EDT 2019


https://github.com/python/cpython/commit/b2758ff9553d8bebe4e9dd1cb3996212473810e3
commit: b2758ff9553d8bebe4e9dd1cb3996212473810e3
branch: master
author: Jason R. Coombs <jaraco at jaraco.com>
committer: Barry Warsaw <barry at python.org>
date: 2019-05-08T09:45:05-04:00
summary:

bpo-36832: add zipfile.Path (#13153)

* bpo-36832: add zipfile.Path

* bpo-36832: add documentation for zipfile.Path

* 📜🤖 Added by blurb_it.

* Remove module reference from blurb.

* Sort the imports

* Update docstrings and docs per recommendations.

* Rely on test.support.temp_dir

* Signal that 'root' is the parameter.

* Correct spelling of 'mod'

* Convert docstring to comment for brevity.

* Fix more errors in the docs

files:
A Misc/NEWS.d/next/Library/2019-05-07-15-00-45.bpo-36832.TExgqb.rst
M Doc/library/zipfile.rst
M Lib/test/test_zipfile.py
M Lib/zipfile.py

diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
index 4e9edff27014..9db9697105d6 100644
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -52,6 +52,15 @@ The module defines the following items:
    :ref:`zipfile-objects` for constructor details.
 
 
+.. class:: Path
+   :noindex:
+
+   A pathlib-compatible wrapper for zip files. See section
+   :ref:`path-objects` for details.
+
+   .. versionadded:: 3.8
+
+
 .. class:: PyZipFile
    :noindex:
 
@@ -456,6 +465,64 @@ The following data attributes are also available:
    truncated.
 
 
+.. _path-objects:
+
+Path Objects
+------------
+
+.. class:: Path(root, at='')
+
+   Construct a Path object from a ``root`` zipfile (which may be a
+   :class:`ZipFile` instance or ``file`` suitable for passing to
+   the :class:`ZipFile` constructor).
+
+   ``at`` specifies the location of this Path within the zipfile,
+   e.g. 'dir/file.txt', 'dir/', or ''. Defaults to the empty string,
+   indicating the root.
+
+Path objects expose the following features of :mod:`pathlib.Path`
+objects:
+
+Path objects are traversable using the ``/`` operator.
+
+.. attribute:: Path.name
+
+   The final path component.
+
+.. method:: Path.open(*, **)
+
+   Invoke :meth:`ZipFile.open` on the current path. Accepts
+   the same arguments as :meth:`ZipFile.open`.
+
+.. method:: Path.listdir()
+
+   Enumerate the children of the current directory.
+
+.. method:: Path.is_dir()
+
+   Return ``True`` if the current context references a directory.
+
+.. method:: Path.is_file()
+
+   Return ``True`` if the current context references a file.
+
+.. method:: Path.exists()
+
+   Return ``True`` if the current context references a file or
+   directory in the zip file.
+
+.. method:: Path.read_text(*, **)
+
+   Read the current file as unicode text. Positional and
+   keyword arguments are passed through to
+   :class:`io.TextIOWrapper` (except ``buffer``, which is
+   implied by the context).
+
+.. method:: Path.read_bytes()
+
+   Read the current file as bytes.
+
+
 .. _pyzipfile-objects:
 
 PyZipFile Objects
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index 14e1e08c5bfd..538d4ee55dfb 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -1,13 +1,15 @@
 import contextlib
+import importlib.util
 import io
 import os
-import importlib.util
 import pathlib
 import posixpath
-import time
+import shutil
 import struct
-import zipfile
+import tempfile
+import time
 import unittest
+import zipfile
 
 
 from tempfile import TemporaryFile
@@ -2392,5 +2394,113 @@ def test_extract_command(self):
                             with open(path, 'rb') as f:
                                 self.assertEqual(f.read(), zf.read(zi))
 
+
+# Poor man's technique to consume a (smallish) iterable.
+consume = tuple
+
+
+def add_dirs(zipfile):
+    """
+    Given a writable zipfile, inject directory entries for
+    any directories implied by the presence of children.
+    """
+    names = zipfile.namelist()
+    consume(
+        zipfile.writestr(name + "/", b"")
+        for name in map(posixpath.dirname, names)
+        if name and name + "/" not in names
+    )
+    return zipfile
+
+
+def build_abcde_files():
+    """
+    Create a zip file with this structure:
+
+    .
+    ├── a.txt
+    └── b
+        ├── c.txt
+        └── d
+            └── e.txt
+    """
+    data = io.BytesIO()
+    zf = zipfile.ZipFile(data, "w")
+    zf.writestr("a.txt", b"content of a")
+    zf.writestr("b/c.txt", b"content of c")
+    zf.writestr("b/d/e.txt", b"content of e")
+    zf.filename = "abcde.zip"
+    return zf
+
+
+class TestPath(unittest.TestCase):
+    def setUp(self):
+        self.fixtures = contextlib.ExitStack()
+        self.addCleanup(self.fixtures.close)
+
+    def zipfile_abcde(self):
+        with self.subTest():
+            yield build_abcde_files()
+        with self.subTest():
+            yield add_dirs(build_abcde_files())
+
+    def zipfile_ondisk(self):
+        tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir()))
+        for zipfile_abcde in self.zipfile_abcde():
+            buffer = zipfile_abcde.fp
+            zipfile_abcde.close()
+            path = tmpdir / zipfile_abcde.filename
+            with path.open("wb") as strm:
+                strm.write(buffer.getvalue())
+            yield path
+
+    def test_iterdir_istype(self):
+        for zipfile_abcde in self.zipfile_abcde():
+            root = zipfile.Path(zipfile_abcde)
+            assert root.is_dir()
+            a, b = root.iterdir()
+            assert a.is_file()
+            assert b.is_dir()
+            c, d = b.iterdir()
+            assert c.is_file()
+            e, = d.iterdir()
+            assert e.is_file()
+
+    def test_open(self):
+        for zipfile_abcde in self.zipfile_abcde():
+            root = zipfile.Path(zipfile_abcde)
+            a, b = root.iterdir()
+            with a.open() as strm:
+                data = strm.read()
+            assert data == b"content of a"
+
+    def test_read(self):
+        for zipfile_abcde in self.zipfile_abcde():
+            root = zipfile.Path(zipfile_abcde)
+            a, b = root.iterdir()
+            assert a.read_text() == "content of a"
+            assert a.read_bytes() == b"content of a"
+
+    def test_traverse_truediv(self):
+        for zipfile_abcde in self.zipfile_abcde():
+            root = zipfile.Path(zipfile_abcde)
+            a = root / "a"
+            assert a.is_file()
+            e = root / "b" / "d" / "e.txt"
+            assert e.read_text() == "content of e"
+
+    def test_pathlike_construction(self):
+        """
+        zipfile.Path should be constructable from a path-like object
+        """
+        for zipfile_ondisk in self.zipfile_ondisk():
+            pathlike = pathlib.Path(str(zipfile_ondisk))
+            zipfile.Path(pathlike)
+
+    def test_traverse_pathlike(self):
+        for zipfile_abcde in self.zipfile_abcde():
+            root = zipfile.Path(zipfile_abcde)
+            root / pathlib.Path("a")
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 2dc016472117..62475c701f50 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -3,16 +3,18 @@
 
 XXX references to utf-8 need further investigation.
 """
+import binascii
+import functools
+import importlib.util
 import io
 import os
-import importlib.util
-import sys
-import time
-import stat
+import posixpath
 import shutil
+import stat
 import struct
-import binascii
+import sys
 import threading
+import time
 
 try:
     import zlib # We may need its compression method
@@ -2102,6 +2104,138 @@ def _compile(file, optimize=-1):
         return (fname, archivename)
 
 
+class Path:
+    """
+    A pathlib-compatible interface for zip files.
+
+    Consider a zip file with this structure::
+
+        .
+        ├── a.txt
+        └── b
+            ├── c.txt
+            └── d
+                └── e.txt
+
+    >>> data = io.BytesIO()
+    >>> zf = ZipFile(data, 'w')
+    >>> zf.writestr('a.txt', 'content of a')
+    >>> zf.writestr('b/c.txt', 'content of c')
+    >>> zf.writestr('b/d/e.txt', 'content of e')
+    >>> zf.filename = 'abcde.zip'
+
+    Path accepts the zipfile object itself or a filename
+
+    >>> root = Path(zf)
+
+    From there, several path operations are available.
+
+    Directory iteration (including the zip file itself):
+
+    >>> a, b = root.iterdir()
+    >>> a
+    Path('abcde.zip', 'a.txt')
+    >>> b
+    Path('abcde.zip', 'b/')
+
+    name property:
+
+    >>> b.name
+    'b'
+
+    join with divide operator:
+
+    >>> c = b / 'c.txt'
+    >>> c
+    Path('abcde.zip', 'b/c.txt')
+    >>> c.name
+    'c.txt'
+
+    Read text:
+
+    >>> c.read_text()
+    'content of c'
+
+    existence:
+
+    >>> c.exists()
+    True
+    >>> (b / 'missing.txt').exists()
+    False
+
+    Coersion to string:
+
+    >>> str(c)
+    'abcde.zip/b/c.txt'
+    """
+
+    __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
+
+    def __init__(self, root, at=""):
+        self.root = root if isinstance(root, ZipFile) else ZipFile(root)
+        self.at = at
+
+    @property
+    def open(self):
+        return functools.partial(self.root.open, self.at)
+
+    @property
+    def name(self):
+        return posixpath.basename(self.at.rstrip("/"))
+
+    def read_text(self, *args, **kwargs):
+        with self.open() as strm:
+            return io.TextIOWrapper(strm, *args, **kwargs).read()
+
+    def read_bytes(self):
+        with self.open() as strm:
+            return strm.read()
+
+    def _is_child(self, path):
+        return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
+
+    def _next(self, at):
+        return Path(self.root, at)
+
+    def is_dir(self):
+        return not self.at or self.at.endswith("/")
+
+    def is_file(self):
+        return not self.is_dir()
+
+    def exists(self):
+        return self.at in self._names()
+
+    def iterdir(self):
+        if not self.is_dir():
+            raise ValueError("Can't listdir a file")
+        subs = map(self._next, self._names())
+        return filter(self._is_child, subs)
+
+    def __str__(self):
+        return posixpath.join(self.root.filename, self.at)
+
+    def __repr__(self):
+        return self.__repr.format(self=self)
+
+    def __truediv__(self, add):
+        next = posixpath.join(self.at, add)
+        next_dir = posixpath.join(self.at, add, "")
+        names = self._names()
+        return self._next(next_dir if next not in names and next_dir in names else next)
+
+    @staticmethod
+    def _add_implied_dirs(names):
+        return names + [
+            name + "/"
+            for name in map(posixpath.dirname, names)
+            if name and name + "/" not in names
+        ]
+
+    def _names(self):
+        return self._add_implied_dirs(self.root.namelist())
+
+
 def main(args=None):
     import argparse
 
diff --git a/Misc/NEWS.d/next/Library/2019-05-07-15-00-45.bpo-36832.TExgqb.rst b/Misc/NEWS.d/next/Library/2019-05-07-15-00-45.bpo-36832.TExgqb.rst
new file mode 100644
index 000000000000..23577d9b5a82
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-05-07-15-00-45.bpo-36832.TExgqb.rst
@@ -0,0 +1 @@
+Introducing ``zipfile.Path``, a pathlib-compatible wrapper for traversing zip files.



More information about the Python-checkins mailing list