[Python-checkins] cpython (merge 3.6 -> default): Issue #26293: Fixed writing ZIP files that starts not from the start of the

serhiy.storchaka python-checkins at python.org
Fri Oct 7 15:25:22 EDT 2016


https://hg.python.org/cpython/rev/64a19fe3a16a
changeset:   104352:64a19fe3a16a
parent:      104349:def217aaad2f
parent:      104351:b06e75ae1981
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Fri Oct 07 22:25:05 2016 +0300
summary:
  Issue #26293: Fixed writing ZIP files that starts not from the start of the
file.  Offsets in ZIP file now are relative to the start of the archive in
conforming to the specification.

files:
  Lib/test/test_zipfile.py |  43 ++++++++++++++++++++++++++++
  Lib/zipfile.py           |  30 +++++++++---------
  Misc/NEWS                |   4 ++
  3 files changed, 62 insertions(+), 15 deletions(-)


diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -420,6 +420,49 @@
             f.seek(len(data))
             with zipfile.ZipFile(f, "r") as zipfp:
                 self.assertEqual(zipfp.namelist(), [TESTFN])
+                self.assertEqual(zipfp.read(TESTFN), self.data)
+        with open(TESTFN2, 'rb') as f:
+            self.assertEqual(f.read(len(data)), data)
+            zipfiledata = f.read()
+        with io.BytesIO(zipfiledata) as bio, zipfile.ZipFile(bio) as zipfp:
+            self.assertEqual(zipfp.namelist(), [TESTFN])
+            self.assertEqual(zipfp.read(TESTFN), self.data)
+
+    def test_read_concatenated_zip_file(self):
+        with io.BytesIO() as bio:
+            with zipfile.ZipFile(bio, 'w', zipfile.ZIP_STORED) as zipfp:
+                zipfp.write(TESTFN, TESTFN)
+            zipfiledata = bio.getvalue()
+        data = b'I am not a ZipFile!'*10
+        with open(TESTFN2, 'wb') as f:
+            f.write(data)
+            f.write(zipfiledata)
+
+        with zipfile.ZipFile(TESTFN2) as zipfp:
+            self.assertEqual(zipfp.namelist(), [TESTFN])
+            self.assertEqual(zipfp.read(TESTFN), self.data)
+
+    def test_append_to_concatenated_zip_file(self):
+        with io.BytesIO() as bio:
+            with zipfile.ZipFile(bio, 'w', zipfile.ZIP_STORED) as zipfp:
+                zipfp.write(TESTFN, TESTFN)
+            zipfiledata = bio.getvalue()
+        data = b'I am not a ZipFile!'*1000000
+        with open(TESTFN2, 'wb') as f:
+            f.write(data)
+            f.write(zipfiledata)
+
+        with zipfile.ZipFile(TESTFN2, 'a') as zipfp:
+            self.assertEqual(zipfp.namelist(), [TESTFN])
+            zipfp.writestr('strfile', self.data)
+
+        with open(TESTFN2, 'rb') as f:
+            self.assertEqual(f.read(len(data)), data)
+            zipfiledata = f.read()
+        with io.BytesIO(zipfiledata) as bio, zipfile.ZipFile(bio) as zipfp:
+            self.assertEqual(zipfp.namelist(), [TESTFN, 'strfile'])
+            self.assertEqual(zipfp.read(TESTFN), self.data)
+            self.assertEqual(zipfp.read('strfile'), self.data)
 
     def test_ignores_newline_at_end(self):
         with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED) as zipfp:
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -1103,10 +1103,10 @@
                 # even if no files are added to the archive
                 self._didModify = True
                 try:
-                    self.start_dir = self.fp.tell()
+                    self.start_dir = self._start_disk = self.fp.tell()
                 except (AttributeError, OSError):
                     self.fp = _Tellable(self.fp)
-                    self.start_dir = 0
+                    self.start_dir = self._start_disk = 0
                     self._seekable = False
                 else:
                     # Some file-like objects can provide tell() but not seek()
@@ -1127,7 +1127,7 @@
                     # set the modified flag so central directory gets written
                     # even if no files are added to the archive
                     self._didModify = True
-                    self.start_dir = self.fp.tell()
+                    self.start_dir = self._start_disk = self.fp.tell()
             else:
                 raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
         except:
@@ -1171,17 +1171,18 @@
         offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
         self._comment = endrec[_ECD_COMMENT]    # archive comment
 
-        # "concat" is zero, unless zip was concatenated to another file
-        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
+        # self._start_disk:  Position of the start of ZIP archive
+        # It is zero, unless ZIP was concatenated to another file
+        self._start_disk = endrec[_ECD_LOCATION] - size_cd - offset_cd
         if endrec[_ECD_SIGNATURE] == stringEndArchive64:
             # If Zip64 extension structures are present, account for them
-            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
+            self._start_disk -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
 
         if self.debug > 2:
-            inferred = concat + offset_cd
-            print("given, inferred, offset", offset_cd, inferred, concat)
+            inferred = self._start_disk + offset_cd
+            print("given, inferred, offset", offset_cd, inferred, self._start_disk)
         # self.start_dir:  Position of start of central directory
-        self.start_dir = offset_cd + concat
+        self.start_dir = offset_cd + self._start_disk
         fp.seek(self.start_dir, 0)
         data = fp.read(size_cd)
         fp = io.BytesIO(data)
@@ -1221,7 +1222,7 @@
                             t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
 
             x._decodeExtra()
-            x.header_offset = x.header_offset + concat
+            x.header_offset = x.header_offset + self._start_disk
             self.filelist.append(x)
             self.NameToInfo[x.filename] = x
 
@@ -1685,11 +1686,10 @@
                 file_size = zinfo.file_size
                 compress_size = zinfo.compress_size
 
-            if zinfo.header_offset > ZIP64_LIMIT:
-                extra.append(zinfo.header_offset)
+            header_offset = zinfo.header_offset - self._start_disk
+            if header_offset > ZIP64_LIMIT:
+                extra.append(header_offset)
                 header_offset = 0xffffffff
-            else:
-                header_offset = zinfo.header_offset
 
             extra_data = zinfo.extra
             min_version = 0
@@ -1736,7 +1736,7 @@
         # Write end-of-zip-archive record
         centDirCount = len(self.filelist)
         centDirSize = pos2 - self.start_dir
-        centDirOffset = self.start_dir
+        centDirOffset = self.start_dir - self._start_disk
         requires_zip64 = None
         if centDirCount > ZIP_FILECOUNT_LIMIT:
             requires_zip64 = "Files count"
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -64,6 +64,10 @@
 Library
 -------
 
+- Issue #26293: Fixed writing ZIP files that starts not from the start of the
+  file.  Offsets in ZIP file now are relative to the start of the archive in
+  conforming to the specification.
+
 - Issue #28380: unittest.mock Mock autospec functions now properly support
   assert_called, assert_not_called, and assert_called_once.
 

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list