[Python-checkins] cpython (2.7): Issue #26293: Fixed writing ZIP files that starts not from the start of the

serhiy.storchaka python-checkins at python.org
Fri Oct 7 16:13:12 EDT 2016


https://hg.python.org/cpython/rev/9a99a88301ef
changeset:   104353:9a99a88301ef
branch:      2.7
parent:      104346:47d5bf5a846f
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Fri Oct 07 23:12:53 2016 +0300
summary:
  Issue #26293: Fixed writing ZIP files that starts not from the start of the
file.  Offsets in ZIP file now are relative to the start of the archive in
conforming to the specification.

files:
  Lib/test/test_zipfile.py |  43 ++++++++++++++++++++++++++++
  Lib/zipfile.py           |  28 +++++++++--------
  Misc/NEWS                |   4 ++
  3 files changed, 62 insertions(+), 13 deletions(-)


diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -344,6 +344,49 @@
             f.seek(len(data))
             with zipfile.ZipFile(f, "r") as zipfp:
                 self.assertEqual(zipfp.namelist(), [TESTFN])
+                self.assertEqual(zipfp.read(TESTFN), self.data)
+        with open(TESTFN2, 'rb') as f:
+            self.assertEqual(f.read(len(data)), data)
+            zipfiledata = f.read()
+        with io.BytesIO(zipfiledata) as bio, zipfile.ZipFile(bio) as zipfp:
+            self.assertEqual(zipfp.namelist(), [TESTFN])
+            self.assertEqual(zipfp.read(TESTFN), self.data)
+
+    def test_read_concatenated_zip_file(self):
+        with io.BytesIO() as bio:
+            with zipfile.ZipFile(bio, 'w', zipfile.ZIP_STORED) as zipfp:
+                zipfp.write(TESTFN, TESTFN)
+            zipfiledata = bio.getvalue()
+        data = b'I am not a ZipFile!'*10
+        with open(TESTFN2, 'wb') as f:
+            f.write(data)
+            f.write(zipfiledata)
+
+        with zipfile.ZipFile(TESTFN2) as zipfp:
+            self.assertEqual(zipfp.namelist(), [TESTFN])
+            self.assertEqual(zipfp.read(TESTFN), self.data)
+
+    def test_append_to_concatenated_zip_file(self):
+        with io.BytesIO() as bio:
+            with zipfile.ZipFile(bio, 'w', zipfile.ZIP_STORED) as zipfp:
+                zipfp.write(TESTFN, TESTFN)
+            zipfiledata = bio.getvalue()
+        data = b'I am not a ZipFile!'*1000000
+        with open(TESTFN2, 'wb') as f:
+            f.write(data)
+            f.write(zipfiledata)
+
+        with zipfile.ZipFile(TESTFN2, 'a') as zipfp:
+            self.assertEqual(zipfp.namelist(), [TESTFN])
+            zipfp.writestr('strfile', self.data)
+
+        with open(TESTFN2, 'rb') as f:
+            self.assertEqual(f.read(len(data)), data)
+            zipfiledata = f.read()
+        with io.BytesIO(zipfiledata) as bio, zipfile.ZipFile(bio) as zipfp:
+            self.assertEqual(zipfp.namelist(), [TESTFN, 'strfile'])
+            self.assertEqual(zipfp.read(TESTFN), self.data)
+            self.assertEqual(zipfp.read('strfile'), self.data)
 
     def test_ignores_newline_at_end(self):
         with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED) as zipfp:
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -772,6 +772,7 @@
                 # set the modified flag so central directory gets written
                 # even if no files are added to the archive
                 self._didModify = True
+                self._start_disk = self.fp.tell()
             elif key == 'a':
                 try:
                     # See if file is a zip file
@@ -785,6 +786,7 @@
                     # set the modified flag so central directory gets written
                     # even if no files are added to the archive
                     self._didModify = True
+                    self._start_disk = self.fp.tell()
             else:
                 raise RuntimeError('Mode must be "r", "w" or "a"')
         except:
@@ -815,17 +817,18 @@
         offset_cd = endrec[_ECD_OFFSET]         # offset of central directory
         self._comment = endrec[_ECD_COMMENT]    # archive comment
 
-        # "concat" is zero, unless zip was concatenated to another file
-        concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
+        # self._start_disk:  Position of the start of ZIP archive
+        # It is zero, unless ZIP was concatenated to another file
+        self._start_disk = endrec[_ECD_LOCATION] - size_cd - offset_cd
         if endrec[_ECD_SIGNATURE] == stringEndArchive64:
             # If Zip64 extension structures are present, account for them
-            concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
+            self._start_disk -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
 
         if self.debug > 2:
-            inferred = concat + offset_cd
-            print "given, inferred, offset", offset_cd, inferred, concat
+            inferred = self._start_disk + offset_cd
+            print "given, inferred, offset", offset_cd, inferred, self._start_disk
         # self.start_dir:  Position of start of central directory
-        self.start_dir = offset_cd + concat
+        self.start_dir = offset_cd + self._start_disk
         fp.seek(self.start_dir, 0)
         data = fp.read(size_cd)
         fp = cStringIO.StringIO(data)
@@ -855,7 +858,7 @@
                                      t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
 
             x._decodeExtra()
-            x.header_offset = x.header_offset + concat
+            x.header_offset = x.header_offset + self._start_disk
             x.filename = x._decodeFilename()
             self.filelist.append(x)
             self.NameToInfo[x.filename] = x
@@ -1198,7 +1201,7 @@
                 raise RuntimeError('Compressed size larger than uncompressed size')
         # Seek backwards and write file header (which will now include
         # correct CRC and file sizes)
-        position = self.fp.tell()       # Preserve current position in file
+        position = self.fp.tell() # Preserve current position in file
         self.fp.seek(zinfo.header_offset, 0)
         self.fp.write(zinfo.FileHeader(zip64))
         self.fp.seek(position, 0)
@@ -1284,11 +1287,10 @@
                         file_size = zinfo.file_size
                         compress_size = zinfo.compress_size
 
-                    if zinfo.header_offset > ZIP64_LIMIT:
-                        extra.append(zinfo.header_offset)
+                    header_offset = zinfo.header_offset - self._start_disk
+                    if header_offset > ZIP64_LIMIT:
+                        extra.append(header_offset)
                         header_offset = 0xffffffffL
-                    else:
-                        header_offset = zinfo.header_offset
 
                     extra_data = zinfo.extra
                     if extra:
@@ -1332,7 +1334,7 @@
                 # Write end-of-zip-archive record
                 centDirCount = len(self.filelist)
                 centDirSize = pos2 - pos1
-                centDirOffset = pos1
+                centDirOffset = pos1 - self._start_disk
                 requires_zip64 = None
                 if centDirCount > ZIP_FILECOUNT_LIMIT:
                     requires_zip64 = "Files count"
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -49,6 +49,10 @@
 Library
 -------
 
+- Issue #26293: Fixed writing ZIP files that starts not from the start of the
+  file.  Offsets in ZIP file now are relative to the start of the archive in
+  conforming to the specification.
+
 - Fix possible integer overflows and crashes in the mmap module with unusual
   usage patterns.
 

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list