[Python-checkins] bpo-30693: zip+tarfile: sort directory listing (#2263)

Victor Stinner webhook-mailer at python.org
Wed Jan 31 05:17:17 EST 2018


https://github.com/python/cpython/commit/84521047e413d7d1150aaa1c333580b683b3f4b1
commit: 84521047e413d7d1150aaa1c333580b683b3f4b1
branch: master
author: Bernhard M. Wiedemann <githubbmw at lsmod.de>
committer: Victor Stinner <victor.stinner at gmail.com>
date: 2018-01-31T11:17:10+01:00
summary:

bpo-30693: zip+tarfile: sort directory listing (#2263)

tarfile and zipfile now sort directory listing to generate tar and zip archives
in a more reproducible way.

See also https://reproducible-builds.org/docs/stable-inputs/ on that topic.

files:
A Misc/NEWS.d/next/Library/2017-11-27-15-09-49.bpo-30693.yC4mJ7.rst
A Misc/NEWS.d/next/Library/2017-11-27-15-09-49.bpo-30693.yC4mJ8.rst
M Doc/library/tarfile.rst
M Doc/library/zipfile.rst
M Lib/tarfile.py
M Lib/test/test_tarfile.py
M Lib/zipfile.py

diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst
index 2450716a1d91..9cd07158e7f6 100644
--- a/Doc/library/tarfile.rst
+++ b/Doc/library/tarfile.rst
@@ -451,7 +451,8 @@ be finalized; only the internally used file object will be closed. See the
    (directory, fifo, symbolic link, etc.). If given, *arcname* specifies an
    alternative name for the file in the archive. Directories are added
    recursively by default. This can be avoided by setting *recursive* to
-   :const:`False`.  If *filter* is given, it
+   :const:`False`. Recursion adds entries in sorted order.
+   If *filter* is given, it
    should be a function that takes a :class:`TarInfo` object argument and
    returns the changed :class:`TarInfo` object. If it instead returns
    :const:`None` the :class:`TarInfo` object will be excluded from the
@@ -460,6 +461,9 @@ be finalized; only the internally used file object will be closed. See the
    .. versionchanged:: 3.2
       Added the *filter* parameter.
 
+   .. versionchanged:: 3.7
+      Recursion adds entries in sorted order.
+
 
 .. method:: TarFile.addfile(tarinfo, fileobj=None)
 
diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
index 7c9a8c802254..c0f2a89a3a17 100644
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -491,7 +491,7 @@ The :class:`PyZipFile` constructor takes the same parameters as the
       :file:`\*.pyc` are added at the top level.  If the directory is a
       package directory, then all :file:`\*.pyc` are added under the package
       name as a file path, and if any subdirectories are package directories,
-      all of these are added recursively.
+      all of these are added recursively in sorted order.
 
       *basename* is intended for internal use only.
 
@@ -524,6 +524,9 @@ The :class:`PyZipFile` constructor takes the same parameters as the
       .. versionchanged:: 3.6.2
          The *pathname* parameter accepts a :term:`path-like object`.
 
+      .. versionchanged:: 3.7
+         Recursion sorts directory entries.
+
 
 .. _zipinfo-objects:
 
diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 0b8d31f85cf3..a24ee42abf82 100755
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -1943,7 +1943,7 @@ def add(self, name, arcname=None, recursive=True, *, filter=None):
         elif tarinfo.isdir():
             self.addfile(tarinfo)
             if recursive:
-                for f in os.listdir(name):
+                for f in sorted(os.listdir(name)):
                     self.add(os.path.join(name, f), os.path.join(arcname, f),
                             recursive, filter=filter)
 
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index 179cbc6dfffc..8ef4294921b2 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -1129,6 +1129,30 @@ def test_directory_size(self):
         finally:
             support.rmdir(path)
 
+    # mock the following:
+    #  os.listdir: so we know that files are in the wrong order
+    @unittest.mock.patch('os.listdir')
+    def test_ordered_recursion(self, mock_listdir):
+        path = os.path.join(TEMPDIR, "directory")
+        os.mkdir(path)
+        open(os.path.join(path, "1"), "a").close()
+        open(os.path.join(path, "2"), "a").close()
+        mock_listdir.return_value = ["2", "1"]
+        try:
+            tar = tarfile.open(tmpname, self.mode)
+            try:
+                tar.add(path)
+                paths = []
+                for m in tar.getmembers():
+                    paths.append(os.path.split(m.name)[-1])
+                self.assertEqual(paths, ["directory", "1", "2"]);
+            finally:
+                tar.close()
+        finally:
+            support.unlink(os.path.join(path, "1"))
+            support.unlink(os.path.join(path, "2"))
+            support.rmdir(path)
+
     def test_gettarinfo_pathlike_name(self):
         with tarfile.open(tmpname, self.mode) as tar:
             path = pathlib.Path(TEMPDIR) / "file"
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 5df7b1bf75b9..b90b60f72e2b 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -1940,7 +1940,7 @@ def writepy(self, pathname, basename="", filterfunc=None):
                 if self.debug:
                     print("Adding", arcname)
                 self.write(fname, arcname)
-                dirlist = os.listdir(pathname)
+                dirlist = sorted(os.listdir(pathname))
                 dirlist.remove("__init__.py")
                 # Add all *.py files and package subdirectories
                 for filename in dirlist:
@@ -1965,7 +1965,7 @@ def writepy(self, pathname, basename="", filterfunc=None):
                 # This is NOT a package directory, add its files at top level
                 if self.debug:
                     print("Adding files from directory", pathname)
-                for filename in os.listdir(pathname):
+                for filename in sorted(os.listdir(pathname)):
                     path = os.path.join(pathname, filename)
                     root, ext = os.path.splitext(filename)
                     if ext == ".py":
@@ -2116,7 +2116,7 @@ def addToZip(zf, path, zippath):
             elif os.path.isdir(path):
                 if zippath:
                     zf.write(path, zippath)
-                for nm in os.listdir(path):
+                for nm in sorted(os.listdir(path)):
                     addToZip(zf,
                              os.path.join(path, nm), os.path.join(zippath, nm))
             # else: ignore
diff --git a/Misc/NEWS.d/next/Library/2017-11-27-15-09-49.bpo-30693.yC4mJ7.rst b/Misc/NEWS.d/next/Library/2017-11-27-15-09-49.bpo-30693.yC4mJ7.rst
new file mode 100644
index 000000000000..9c895c53de12
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-11-27-15-09-49.bpo-30693.yC4mJ7.rst
@@ -0,0 +1 @@
+The ZipFile class now recurses directories in a reproducible way.
diff --git a/Misc/NEWS.d/next/Library/2017-11-27-15-09-49.bpo-30693.yC4mJ8.rst b/Misc/NEWS.d/next/Library/2017-11-27-15-09-49.bpo-30693.yC4mJ8.rst
new file mode 100644
index 000000000000..a622e7ed6e5d
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-11-27-15-09-49.bpo-30693.yC4mJ8.rst
@@ -0,0 +1 @@
+The TarFile class now recurses directories in a reproducible way.



More information about the Python-checkins mailing list