[Python-checkins] bpo-34341: Fix appending to ZIP archives with the ZIP64 extension. (GH-8683)

Serhiy Storchaka webhook-mailer at python.org
Mon Sep 17 08:36:45 EDT 2018


https://github.com/python/cpython/commit/9bdb7be482aef8f60daa1d36606568a132dcb616
commit: 9bdb7be482aef8f60daa1d36606568a132dcb616
branch: master
author: Serhiy Storchaka <storchaka at gmail.com>
committer: GitHub <noreply at github.com>
date: 2018-09-17T15:36:40+03:00
summary:

bpo-34341: Fix appending to ZIP archives with the ZIP64 extension. (GH-8683)

files:
A Misc/NEWS.d/next/Library/2018-08-06-11-01-18.bpo-34341.E0b9p2.rst
M Lib/test/test_zipfile.py
M Lib/zipfile.py

diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
index 4c6f57c07590..7b8922f4e054 100644
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -770,6 +770,20 @@ def test_absolute_arcnames(self):
         with zipfile.ZipFile(TESTFN2, "r", zipfile.ZIP_STORED) as zipfp:
             self.assertEqual(zipfp.namelist(), ["absolute"])
 
+    def test_append(self):
+        # Test that appending to the Zip64 archive doesn't change
+        # extra fields of existing entries.
+        with zipfile.ZipFile(TESTFN2, "w", allowZip64=True) as zipfp:
+            zipfp.writestr("strfile", self.data)
+        with zipfile.ZipFile(TESTFN2, "r", allowZip64=True) as zipfp:
+            zinfo = zipfp.getinfo("strfile")
+            extra = zinfo.extra
+        with zipfile.ZipFile(TESTFN2, "a", allowZip64=True) as zipfp:
+            zipfp.writestr("strfile2", self.data)
+        with zipfile.ZipFile(TESTFN2, "r", allowZip64=True) as zipfp:
+            zinfo = zipfp.getinfo("strfile")
+            self.assertEqual(zinfo.extra, extra)
+
 @requires_zlib
 class DeflateTestZip64InSmallFiles(AbstractTestZip64InSmallFiles,
                                    unittest.TestCase):
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 7f237783773d..89df90b25209 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -159,6 +159,27 @@ class LargeZipFile(Exception):
 _CD64_DIRECTORY_SIZE = 8
 _CD64_OFFSET_START_CENTDIR = 9
 
+_EXTRA_FIELD_STRUCT = struct.Struct('<HH')
+
+def _strip_extra(extra, xids):
+    # Remove Extra Fields with specified IDs.
+    unpack = _EXTRA_FIELD_STRUCT.unpack
+    modified = False
+    buffer = []
+    start = i = 0
+    while i + 4 <= len(extra):
+        xid, xlen = unpack(extra[i : i + 4])
+        j = i + 4 + xlen
+        if xid in xids:
+            if i != start:
+                buffer.append(extra[start : i])
+            start = j
+            modified = True
+        i = j
+    if not modified:
+        return extra
+    return b''.join(buffer)
+
 def _check_zipfile(fp):
     try:
         if _EndRecData(fp):
@@ -1819,6 +1840,7 @@ def _write_end_record(self):
             min_version = 0
             if extra:
                 # Append a ZIP64 field to the extra's
+                extra_data = _strip_extra(extra_data, (1,))
                 extra_data = struct.pack(
                     '<HH' + 'Q'*len(extra),
                     1, 8*len(extra), *extra) + extra_data
diff --git a/Misc/NEWS.d/next/Library/2018-08-06-11-01-18.bpo-34341.E0b9p2.rst b/Misc/NEWS.d/next/Library/2018-08-06-11-01-18.bpo-34341.E0b9p2.rst
new file mode 100644
index 000000000000..46a9cde19c40
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-08-06-11-01-18.bpo-34341.E0b9p2.rst
@@ -0,0 +1,2 @@
+Appending to the ZIP archive with the ZIP64 extension no longer grows the
+size of extra fields of existing entries.



More information about the Python-checkins mailing list