[Python-checkins] bpo-36832: add zipfile.Path (#13153)
Barry Warsaw
webhook-mailer at python.org
Wed May 8 09:45:11 EDT 2019
https://github.com/python/cpython/commit/b2758ff9553d8bebe4e9dd1cb3996212473810e3
commit: b2758ff9553d8bebe4e9dd1cb3996212473810e3
branch: master
author: Jason R. Coombs <jaraco at jaraco.com>
committer: Barry Warsaw <barry at python.org>
date: 2019-05-08T09:45:05-04:00
summary:
bpo-36832: add zipfile.Path (#13153)
* bpo-36832: add zipfile.Path
* bpo-36832: add documentation for zipfile.Path
* 📜🤖 Added by blurb_it.
* Remove module reference from blurb.
* Sort the imports
* Update docstrings and docs per recommendations.
* Rely on test.support.temp_dir
* Signal that 'root' is the parameter.
* Correct spelling of 'mod'
* Convert docstring to comment for brevity.
* Fix more errors in the docs
files:
A Misc/NEWS.d/next/Library/2019-05-07-15-00-45.bpo-36832.TExgqb.rst
M Doc/library/zipfile.rst
M Lib/test/test_zipfile.py
M Lib/zipfile.py
diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
index 4e9edff27014..9db9697105d6 100644
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -52,6 +52,15 @@ The module defines the following items:
:ref:`zipfile-objects` for constructor details.
+.. class:: Path
+ :noindex:
+
+ A pathlib-compatible wrapper for zip files. See section
+ :ref:`path-objects` for details.
+
+ .. versionadded:: 3.8
+
+
.. class:: PyZipFile
:noindex:
@@ -456,6 +465,64 @@ The following data attributes are also available:
truncated.
+.. _path-objects:
+
+Path Objects
+------------
+
+.. class:: Path(root, at='')
+
+ Construct a Path object from a ``root`` zipfile (which may be a
+ :class:`ZipFile` instance or ``file`` suitable for passing to
+ the :class:`ZipFile` constructor).
+
+ ``at`` specifies the location of this Path within the zipfile,
+ e.g. 'dir/file.txt', 'dir/', or ''. Defaults to the empty string,
+ indicating the root.
+
+Path objects expose the following features of :mod:`pathlib.Path`
+objects:
+
+Path objects are traversable using the ``/`` operator.
+
+.. attribute:: Path.name
+
+ The final path component.
+
+.. method:: Path.open(*, **)
+
+ Invoke :meth:`ZipFile.open` on the current path. Accepts
+ the same arguments as :meth:`ZipFile.open`.
+
+.. method:: Path.listdir()
+
+ Enumerate the children of the current directory.
+
+.. method:: Path.is_dir()
+
+ Return ``True`` if the current context references a directory.
+
+.. method:: Path.is_file()
+
+ Return ``True`` if the current context references a file.
+
+.. method:: Path.exists()
+
+ Return ``True`` if the current context references a file or
+ directory in the zip file.
+
+.. method:: Path.read_text(*, **)
+
+ Read the current file as unicode text. Positional and
+ keyword arguments are passed through to
+ :class:`io.TextIOWrapper` (except ``buffer``, which is
+ implied by the context).
+
+.. method:: Path.read_bytes()
+
+ Read the current file as bytes.
+
+
.. _pyzipfile-objects:
PyZipFile Objects
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index 14e1e08c5bfd..538d4ee55dfb 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -1,13 +1,15 @@
import contextlib
+import importlib.util
import io
import os
-import importlib.util
import pathlib
import posixpath
-import time
+import shutil
import struct
-import zipfile
+import tempfile
+import time
import unittest
+import zipfile
from tempfile import TemporaryFile
@@ -2392,5 +2394,113 @@ def test_extract_command(self):
with open(path, 'rb') as f:
self.assertEqual(f.read(), zf.read(zi))
+
+# Poor man's technique to consume a (smallish) iterable.
+consume = tuple
+
+
+def add_dirs(zipfile):
+ """
+ Given a writable zipfile, inject directory entries for
+ any directories implied by the presence of children.
+ """
+ names = zipfile.namelist()
+ consume(
+ zipfile.writestr(name + "/", b"")
+ for name in map(posixpath.dirname, names)
+ if name and name + "/" not in names
+ )
+ return zipfile
+
+
+def build_abcde_files():
+ """
+ Create a zip file with this structure:
+
+ .
+ ├── a.txt
+ └── b
+ ├── c.txt
+ └── d
+ └── e.txt
+ """
+ data = io.BytesIO()
+ zf = zipfile.ZipFile(data, "w")
+ zf.writestr("a.txt", b"content of a")
+ zf.writestr("b/c.txt", b"content of c")
+ zf.writestr("b/d/e.txt", b"content of e")
+ zf.filename = "abcde.zip"
+ return zf
+
+
+class TestPath(unittest.TestCase):
+ def setUp(self):
+ self.fixtures = contextlib.ExitStack()
+ self.addCleanup(self.fixtures.close)
+
+ def zipfile_abcde(self):
+ with self.subTest():
+ yield build_abcde_files()
+ with self.subTest():
+ yield add_dirs(build_abcde_files())
+
+ def zipfile_ondisk(self):
+ tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir()))
+ for zipfile_abcde in self.zipfile_abcde():
+ buffer = zipfile_abcde.fp
+ zipfile_abcde.close()
+ path = tmpdir / zipfile_abcde.filename
+ with path.open("wb") as strm:
+ strm.write(buffer.getvalue())
+ yield path
+
+ def test_iterdir_istype(self):
+ for zipfile_abcde in self.zipfile_abcde():
+ root = zipfile.Path(zipfile_abcde)
+ assert root.is_dir()
+ a, b = root.iterdir()
+ assert a.is_file()
+ assert b.is_dir()
+ c, d = b.iterdir()
+ assert c.is_file()
+ e, = d.iterdir()
+ assert e.is_file()
+
+ def test_open(self):
+ for zipfile_abcde in self.zipfile_abcde():
+ root = zipfile.Path(zipfile_abcde)
+ a, b = root.iterdir()
+ with a.open() as strm:
+ data = strm.read()
+ assert data == b"content of a"
+
+ def test_read(self):
+ for zipfile_abcde in self.zipfile_abcde():
+ root = zipfile.Path(zipfile_abcde)
+ a, b = root.iterdir()
+ assert a.read_text() == "content of a"
+ assert a.read_bytes() == b"content of a"
+
+ def test_traverse_truediv(self):
+ for zipfile_abcde in self.zipfile_abcde():
+ root = zipfile.Path(zipfile_abcde)
+ a = root / "a"
+ assert a.is_file()
+ e = root / "b" / "d" / "e.txt"
+ assert e.read_text() == "content of e"
+
+ def test_pathlike_construction(self):
+ """
+ zipfile.Path should be constructable from a path-like object
+ """
+ for zipfile_ondisk in self.zipfile_ondisk():
+ pathlike = pathlib.Path(str(zipfile_ondisk))
+ zipfile.Path(pathlike)
+
+ def test_traverse_pathlike(self):
+ for zipfile_abcde in self.zipfile_abcde():
+ root = zipfile.Path(zipfile_abcde)
+ root / pathlib.Path("a")
+
if __name__ == "__main__":
unittest.main()
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 2dc016472117..62475c701f50 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -3,16 +3,18 @@
XXX references to utf-8 need further investigation.
"""
+import binascii
+import functools
+import importlib.util
import io
import os
-import importlib.util
-import sys
-import time
-import stat
+import posixpath
import shutil
+import stat
import struct
-import binascii
+import sys
import threading
+import time
try:
import zlib # We may need its compression method
@@ -2102,6 +2104,138 @@ def _compile(file, optimize=-1):
return (fname, archivename)
+class Path:
+ """
+ A pathlib-compatible interface for zip files.
+
+ Consider a zip file with this structure::
+
+ .
+ ├── a.txt
+ └── b
+ ├── c.txt
+ └── d
+ └── e.txt
+
+ >>> data = io.BytesIO()
+ >>> zf = ZipFile(data, 'w')
+ >>> zf.writestr('a.txt', 'content of a')
+ >>> zf.writestr('b/c.txt', 'content of c')
+ >>> zf.writestr('b/d/e.txt', 'content of e')
+ >>> zf.filename = 'abcde.zip'
+
+ Path accepts the zipfile object itself or a filename
+
+ >>> root = Path(zf)
+
+ From there, several path operations are available.
+
+ Directory iteration (including the zip file itself):
+
+ >>> a, b = root.iterdir()
+ >>> a
+ Path('abcde.zip', 'a.txt')
+ >>> b
+ Path('abcde.zip', 'b/')
+
+ name property:
+
+ >>> b.name
+ 'b'
+
+ join with divide operator:
+
+ >>> c = b / 'c.txt'
+ >>> c
+ Path('abcde.zip', 'b/c.txt')
+ >>> c.name
+ 'c.txt'
+
+ Read text:
+
+ >>> c.read_text()
+ 'content of c'
+
+ existence:
+
+ >>> c.exists()
+ True
+ >>> (b / 'missing.txt').exists()
+ False
+
+ Coersion to string:
+
+ >>> str(c)
+ 'abcde.zip/b/c.txt'
+ """
+
+ __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
+
+ def __init__(self, root, at=""):
+ self.root = root if isinstance(root, ZipFile) else ZipFile(root)
+ self.at = at
+
+ @property
+ def open(self):
+ return functools.partial(self.root.open, self.at)
+
+ @property
+ def name(self):
+ return posixpath.basename(self.at.rstrip("/"))
+
+ def read_text(self, *args, **kwargs):
+ with self.open() as strm:
+ return io.TextIOWrapper(strm, *args, **kwargs).read()
+
+ def read_bytes(self):
+ with self.open() as strm:
+ return strm.read()
+
+ def _is_child(self, path):
+ return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
+
+ def _next(self, at):
+ return Path(self.root, at)
+
+ def is_dir(self):
+ return not self.at or self.at.endswith("/")
+
+ def is_file(self):
+ return not self.is_dir()
+
+ def exists(self):
+ return self.at in self._names()
+
+ def iterdir(self):
+ if not self.is_dir():
+ raise ValueError("Can't listdir a file")
+ subs = map(self._next, self._names())
+ return filter(self._is_child, subs)
+
+ def __str__(self):
+ return posixpath.join(self.root.filename, self.at)
+
+ def __repr__(self):
+ return self.__repr.format(self=self)
+
+ def __truediv__(self, add):
+ next = posixpath.join(self.at, add)
+ next_dir = posixpath.join(self.at, add, "")
+ names = self._names()
+ return self._next(next_dir if next not in names and next_dir in names else next)
+
+ @staticmethod
+ def _add_implied_dirs(names):
+ return names + [
+ name + "/"
+ for name in map(posixpath.dirname, names)
+ if name and name + "/" not in names
+ ]
+
+ def _names(self):
+ return self._add_implied_dirs(self.root.namelist())
+
+
def main(args=None):
import argparse
diff --git a/Misc/NEWS.d/next/Library/2019-05-07-15-00-45.bpo-36832.TExgqb.rst b/Misc/NEWS.d/next/Library/2019-05-07-15-00-45.bpo-36832.TExgqb.rst
new file mode 100644
index 000000000000..23577d9b5a82
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-05-07-15-00-45.bpo-36832.TExgqb.rst
@@ -0,0 +1 @@
+Introducing ``zipfile.Path``, a pathlib-compatible wrapper for traversing zip files.
More information about the Python-checkins
mailing list