[Python-checkins] cpython: Issue #23252: Added support for writing ZIP files to unseekable streams.

serhiy.storchaka python-checkins at python.org
Mon Mar 23 00:10:34 CET 2015


https://hg.python.org/cpython/rev/19f36a2a34ec
changeset:   95139:19f36a2a34ec
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Mon Mar 23 01:09:35 2015 +0200
summary:
  Issue #23252:  Added support for writing ZIP files to unseekable streams.

files:
  Doc/library/zipfile.rst  |   5 +-
  Doc/whatsnew/3.5.rst     |   6 ++
  Lib/test/test_zipfile.py |  62 +++++++++++++++++----
  Lib/zipfile.py           |  80 ++++++++++++++++++++-------
  Misc/NEWS                |   2 +
  5 files changed, 120 insertions(+), 35 deletions(-)


diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -140,6 +140,7 @@
    ZIP file, then a new ZIP archive is appended to the file.  This is meant for
    adding a ZIP archive to another file (such as :file:`python.exe`).  If
    *mode* is ``a`` and the file does not exist at all, it is created.
+   If *mode* is ``r`` or ``a``, the file should be seekable.
    *compression* is the ZIP compression method to use when writing the archive,
    and should be :const:`ZIP_STORED`, :const:`ZIP_DEFLATED`,
    :const:`ZIP_BZIP2` or :const:`ZIP_LZMA`; unrecognized
@@ -171,6 +172,9 @@
    .. versionchanged:: 3.4
       ZIP64 extensions are enabled by default.
 
+   .. versionchanged:: 3.5
+      Added support for writing to unseekable streams.
+
 
 .. method:: ZipFile.close()
 
@@ -328,7 +332,6 @@
       If ``arcname`` (or ``filename``, if ``arcname`` is  not given) contains a null
       byte, the name of the file in the archive will be truncated at the null byte.
 
-
 .. method:: ZipFile.writestr(zinfo_or_arcname, bytes[, compress_type])
 
    Write the string *bytes* to the archive; *zinfo_or_arcname* is either the file
diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst
--- a/Doc/whatsnew/3.5.rst
+++ b/Doc/whatsnew/3.5.rst
@@ -448,6 +448,12 @@
   :func:`~faulthandler.dump_traceback_later` functions now accept file
   descriptors.  (Contributed by Wei Wu in :issue:`23566`.)
 
+zipfile
+-------
+
+* Added support for writing ZIP files to unseekable streams.
+  (Contributed by Serhiy Storchaka in :issue:`23252`.)
+
 
 Optimizations
 =============
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -1685,25 +1685,63 @@
         self.offset = 0
 
     def write(self, data):
-        self.offset += self.fp.write(data)
+        n = self.fp.write(data)
+        self.offset += n
+        return n
 
     def tell(self):
         return self.offset
 
     def flush(self):
-        pass
+        self.fp.flush()
+
+class Unseekable:
+    def __init__(self, fp):
+        self.fp = fp
+
+    def write(self, data):
+        return self.fp.write(data)
+
+    def flush(self):
+        self.fp.flush()
 
 class UnseekableTests(unittest.TestCase):
-    def test_writestr_tellable(self):
-        f = io.BytesIO()
-        with zipfile.ZipFile(Tellable(f), 'w', zipfile.ZIP_STORED) as zipfp:
-            zipfp.writestr('ones', b'111')
-            zipfp.writestr('twos', b'222')
-        with zipfile.ZipFile(f, mode='r') as zipf:
-            with zipf.open('ones') as zopen:
-                self.assertEqual(zopen.read(), b'111')
-            with zipf.open('twos') as zopen:
-                self.assertEqual(zopen.read(), b'222')
+    def test_writestr(self):
+        for wrapper in (lambda f: f), Tellable, Unseekable:
+            with self.subTest(wrapper=wrapper):
+                f = io.BytesIO()
+                f.write(b'abc')
+                bf = io.BufferedWriter(f)
+                with zipfile.ZipFile(wrapper(bf), 'w', zipfile.ZIP_STORED) as zipfp:
+                    zipfp.writestr('ones', b'111')
+                    zipfp.writestr('twos', b'222')
+                self.assertEqual(f.getvalue()[:5], b'abcPK')
+                with zipfile.ZipFile(f, mode='r') as zipf:
+                    with zipf.open('ones') as zopen:
+                        self.assertEqual(zopen.read(), b'111')
+                    with zipf.open('twos') as zopen:
+                        self.assertEqual(zopen.read(), b'222')
+
+    def test_write(self):
+        for wrapper in (lambda f: f), Tellable, Unseekable:
+            with self.subTest(wrapper=wrapper):
+                f = io.BytesIO()
+                f.write(b'abc')
+                bf = io.BufferedWriter(f)
+                with zipfile.ZipFile(wrapper(bf), 'w', zipfile.ZIP_STORED) as zipfp:
+                    self.addCleanup(unlink, TESTFN)
+                    with open(TESTFN, 'wb') as f2:
+                        f2.write(b'111')
+                    zipfp.write(TESTFN, 'ones')
+                    with open(TESTFN, 'wb') as f2:
+                        f2.write(b'222')
+                    zipfp.write(TESTFN, 'twos')
+                self.assertEqual(f.getvalue()[:5], b'abcPK')
+                with zipfile.ZipFile(f, mode='r') as zipf:
+                    with zipf.open('ones') as zopen:
+                        self.assertEqual(zopen.read(), b'111')
+                    with zipf.open('twos') as zopen:
+                        self.assertEqual(zopen.read(), b'222')
 
 
 @requires_zlib
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -667,6 +667,26 @@
             self._file = None
             self._close(fileobj)
 
+# Provide the tell method for unseekable stream
+class _Tellable:
+    def __init__(self, fp):
+        self.fp = fp
+        self.offset = 0
+
+    def write(self, data):
+        n = self.fp.write(data)
+        self.offset += n
+        return n
+
+    def tell(self):
+        return self.offset
+
+    def flush(self):
+        self.fp.flush()
+
+    def close(self):
+        self.fp.close()
+
 
 class ZipExtFile(io.BufferedIOBase):
     """File-like object for reading an archive member.
@@ -994,6 +1014,7 @@
             self.filename = getattr(file, 'name', None)
         self._fileRefCnt = 1
         self._lock = threading.RLock()
+        self._seekable = True
 
         try:
             if mode == 'r':
@@ -1002,13 +1023,24 @@
                 # set the modified flag so central directory gets written
                 # even if no files are added to the archive
                 self._didModify = True
-                self.start_dir = self.fp.tell()
+                try:
+                    self.start_dir = self.fp.tell()
+                except (AttributeError, OSError):
+                    self.fp = _Tellable(self.fp)
+                    self.start_dir = 0
+                    self._seekable = False
+                else:
+                    # Some file-like objects can provide tell() but not seek()
+                    try:
+                        self.fp.seek(self.start_dir)
+                    except (AttributeError, OSError):
+                        self._seekable = False
             elif mode == 'a':
                 try:
                     # See if file is a zip file
                     self._RealGetContents()
                     # seek to start of directory and overwrite
-                    self.fp.seek(self.start_dir, 0)
+                    self.fp.seek(self.start_dir)
                 except BadZipFile:
                     # file is not a zip file, just append
                     self.fp.seek(0, 2)
@@ -1415,7 +1447,8 @@
         zinfo.file_size = st.st_size
         zinfo.flag_bits = 0x00
         with self._lock:
-            self.fp.seek(self.start_dir, 0)
+            if self._seekable:
+                self.fp.seek(self.start_dir)
             zinfo.header_offset = self.fp.tell()    # Start of header bytes
             if zinfo.compress_type == ZIP_LZMA:
                 # Compressed data includes an end-of-stream (EOS) marker
@@ -1436,6 +1469,8 @@
                 return
 
             cmpr = _get_compressor(zinfo.compress_type)
+            if not self._seekable:
+                zinfo.flag_bits |= 0x08
             with open(filename, "rb") as fp:
                 # Must overwrite CRC and sizes with correct data later
                 zinfo.CRC = CRC = 0
@@ -1464,17 +1499,24 @@
                 zinfo.compress_size = file_size
             zinfo.CRC = CRC
             zinfo.file_size = file_size
-            if not zip64 and self._allowZip64:
-                if file_size > ZIP64_LIMIT:
-                    raise RuntimeError('File size has increased during compressing')
-                if compress_size > ZIP64_LIMIT:
-                    raise RuntimeError('Compressed size larger than uncompressed size')
-            # Seek backwards and write file header (which will now include
-            # correct CRC and file sizes)
-            self.start_dir = self.fp.tell()       # Preserve current position in file
-            self.fp.seek(zinfo.header_offset, 0)
-            self.fp.write(zinfo.FileHeader(zip64))
-            self.fp.seek(self.start_dir, 0)
+            if zinfo.flag_bits & 0x08:
+                # Write CRC and file sizes after the file data
+                fmt = '<LQQ' if zip64 else '<LLL'
+                self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
+                                          zinfo.file_size))
+                self.start_dir = self.fp.tell()
+            else:
+                if not zip64 and self._allowZip64:
+                    if file_size > ZIP64_LIMIT:
+                        raise RuntimeError('File size has increased during compressing')
+                    if compress_size > ZIP64_LIMIT:
+                        raise RuntimeError('Compressed size larger than uncompressed size')
+                # Seek backwards and write file header (which will now include
+                # correct CRC and file sizes)
+                self.start_dir = self.fp.tell() # Preserve current position in file
+                self.fp.seek(zinfo.header_offset)
+                self.fp.write(zinfo.FileHeader(zip64))
+                self.fp.seek(self.start_dir)
             self.filelist.append(zinfo)
             self.NameToInfo[zinfo.filename] = zinfo
 
@@ -1504,11 +1546,8 @@
 
         zinfo.file_size = len(data)            # Uncompressed size
         with self._lock:
-            try:
+            if self._seekable:
                 self.fp.seek(self.start_dir)
-            except (AttributeError, io.UnsupportedOperation):
-                # Some file-like objects can provide tell() but not seek()
-                pass
             zinfo.header_offset = self.fp.tell()    # Start of header data
             if compress_type is not None:
                 zinfo.compress_type = compress_type
@@ -1557,11 +1596,8 @@
         try:
             if self.mode in ("w", "a") and self._didModify: # write ending records
                 with self._lock:
-                    try:
+                    if self._seekable:
                         self.fp.seek(self.start_dir)
-                    except (AttributeError, io.UnsupportedOperation):
-                        # Some file-like objects can provide tell() but not seek()
-                        pass
                     self._write_end_record()
         finally:
             fp = self.fp
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -23,6 +23,8 @@
 Library
 -------
 
+- Issue #23252:  Added support for writing ZIP files to unseekable streams.
+
 - Issue #21526: Tkinter now supports new boolean type in Tcl 8.5.
 
 - Issue #23647: Increase impalib's MAXLINE to accommodate modern mailbox sizes.

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list