[Python-checkins] cpython (merge 3.4 -> default): Issue #14099: ZipFile.open() no longer reopen the underlying file. Objects

serhiy.storchaka python-checkins at python.org
Wed Dec 3 08:18:40 CET 2014


https://hg.python.org/cpython/rev/334c01aa7f93
changeset:   93711:334c01aa7f93
parent:      93706:fd80195b920f
parent:      93710:e5bb3044402b
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Wed Dec 03 09:14:36 2014 +0200
summary:
  Issue #14099: ZipFile.open() no longer reopen the underlying file.  Objects
returned by ZipFile.open() can now operate independently of the ZipFile even
if the ZipFile was created by passing in a file-like object as the first
argument to the constructor.

files:
  Doc/library/zipfile.rst  |   10 +-
  Lib/test/test_zipfile.py |  121 +++++++++++++++++++++-----
  Lib/zipfile.py           |  103 ++++++++++++++--------
  Misc/NEWS                |    5 +
  4 files changed, 166 insertions(+), 73 deletions(-)


diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -219,14 +219,8 @@
 
    .. note::
 
-      If the ZipFile was created by passing in a file-like object as the  first
-      argument to the constructor, then the object returned by :meth:`.open` shares the
-      ZipFile's file pointer.  Under these  circumstances, the object returned by
-      :meth:`.open` should not  be used after any additional operations are performed
-      on the  ZipFile object.  If the ZipFile was created by passing in a string (the
-      filename) as the first argument to the constructor, then  :meth:`.open` will
-      create a new file object that will be held by the ZipExtFile, allowing it to
-      operate independently of the  ZipFile.
+      Objects returned by :meth:`.open` can operate independently of the
+      ZipFile.
 
    .. note::
 
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -1,3 +1,4 @@
+import contextlib
 import io
 import os
 import sys
@@ -25,6 +26,9 @@
                    ('ziptest2dir/ziptest3dir/_ziptest3', 'azsxdcfvgb'),
                    ('ziptest2dir/ziptest3dir/ziptest4dir/_ziptest3', '6y7u8i9o0p')]
 
+def getrandbytes(size):
+    return getrandbits(8 * size).to_bytes(size, 'little')
+
 def get_files(test):
     yield TESTFN2
     with TemporaryFile() as f:
@@ -289,7 +293,7 @@
         # than requested.
         for test_size in (1, 4095, 4096, 4097, 16384):
             file_size = test_size + 1
-            junk = getrandbits(8 * file_size).to_bytes(file_size, 'little')
+            junk = getrandbytes(file_size)
             with zipfile.ZipFile(io.BytesIO(), "w", self.compression) as zipf:
                 zipf.writestr('foo', junk)
                 with zipf.open('foo', 'r') as fp:
@@ -1666,46 +1670,111 @@
 
 @requires_zlib
 class TestsWithMultipleOpens(unittest.TestCase):
-    def setUp(self):
+    @classmethod
+    def setUpClass(cls):
+        cls.data1 = b'111' + getrandbytes(10000)
+        cls.data2 = b'222' + getrandbytes(10000)
+
+    def make_test_archive(self, f):
         # Create the ZIP archive
-        with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_DEFLATED) as zipfp:
-            zipfp.writestr('ones', '1'*FIXEDTEST_SIZE)
-            zipfp.writestr('twos', '2'*FIXEDTEST_SIZE)
+        with zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED) as zipfp:
+            zipfp.writestr('ones', self.data1)
+            zipfp.writestr('twos', self.data2)
 
     def test_same_file(self):
         # Verify that (when the ZipFile is in control of creating file objects)
         # multiple open() calls can be made without interfering with each other.
-        with zipfile.ZipFile(TESTFN2, mode="r") as zipf:
-            with zipf.open('ones') as zopen1, zipf.open('ones') as zopen2:
-                data1 = zopen1.read(500)
-                data2 = zopen2.read(500)
-                data1 += zopen1.read(500)
-                data2 += zopen2.read(500)
-            self.assertEqual(data1, data2)
+        for f in get_files(self):
+            self.make_test_archive(f)
+            with zipfile.ZipFile(f, mode="r") as zipf:
+                with zipf.open('ones') as zopen1, zipf.open('ones') as zopen2:
+                    data1 = zopen1.read(500)
+                    data2 = zopen2.read(500)
+                    data1 += zopen1.read()
+                    data2 += zopen2.read()
+                self.assertEqual(data1, data2)
+                self.assertEqual(data1, self.data1)
 
     def test_different_file(self):
         # Verify that (when the ZipFile is in control of creating file objects)
         # multiple open() calls can be made without interfering with each other.
-        with zipfile.ZipFile(TESTFN2, mode="r") as zipf:
-            with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:
-                data1 = zopen1.read(500)
-                data2 = zopen2.read(500)
-                data1 += zopen1.read(500)
-                data2 += zopen2.read(500)
-            self.assertEqual(data1, b'1'*FIXEDTEST_SIZE)
-            self.assertEqual(data2, b'2'*FIXEDTEST_SIZE)
+        for f in get_files(self):
+            self.make_test_archive(f)
+            with zipfile.ZipFile(f, mode="r") as zipf:
+                with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:
+                    data1 = zopen1.read(500)
+                    data2 = zopen2.read(500)
+                    data1 += zopen1.read()
+                    data2 += zopen2.read()
+                self.assertEqual(data1, self.data1)
+                self.assertEqual(data2, self.data2)
 
     def test_interleaved(self):
         # Verify that (when the ZipFile is in control of creating file objects)
         # multiple open() calls can be made without interfering with each other.
-        with zipfile.ZipFile(TESTFN2, mode="r") as zipf:
-            with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:
+        for f in get_files(self):
+            self.make_test_archive(f)
+            with zipfile.ZipFile(f, mode="r") as zipf:
+                with zipf.open('ones') as zopen1, zipf.open('twos') as zopen2:
+                    data1 = zopen1.read(500)
+                    data2 = zopen2.read(500)
+                    data1 += zopen1.read()
+                    data2 += zopen2.read()
+                self.assertEqual(data1, self.data1)
+                self.assertEqual(data2, self.data2)
+
+    def test_read_after_close(self):
+        for f in get_files(self):
+            self.make_test_archive(f)
+            with contextlib.ExitStack() as stack:
+                with zipfile.ZipFile(f, 'r') as zipf:
+                    zopen1 = stack.enter_context(zipf.open('ones'))
+                    zopen2 = stack.enter_context(zipf.open('twos'))
                 data1 = zopen1.read(500)
                 data2 = zopen2.read(500)
-                data1 += zopen1.read(500)
-                data2 += zopen2.read(500)
-            self.assertEqual(data1, b'1'*FIXEDTEST_SIZE)
-            self.assertEqual(data2, b'2'*FIXEDTEST_SIZE)
+                data1 += zopen1.read()
+                data2 += zopen2.read()
+            self.assertEqual(data1, self.data1)
+            self.assertEqual(data2, self.data2)
+
+    def test_read_after_write(self):
+        for f in get_files(self):
+            with zipfile.ZipFile(f, 'w', zipfile.ZIP_DEFLATED) as zipf:
+                zipf.writestr('ones', self.data1)
+                zipf.writestr('twos', self.data2)
+                with zipf.open('ones') as zopen1:
+                    data1 = zopen1.read(500)
+            self.assertEqual(data1, self.data1[:500])
+            with zipfile.ZipFile(f, 'r') as zipf:
+                data1 = zipf.read('ones')
+                data2 = zipf.read('twos')
+            self.assertEqual(data1, self.data1)
+            self.assertEqual(data2, self.data2)
+
+    def test_write_after_read(self):
+        for f in get_files(self):
+            with zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED) as zipf:
+                zipf.writestr('ones', self.data1)
+                with zipf.open('ones') as zopen1:
+                    zopen1.read(500)
+                    zipf.writestr('twos', self.data2)
+            with zipfile.ZipFile(f, 'r') as zipf:
+                data1 = zipf.read('ones')
+                data2 = zipf.read('twos')
+            self.assertEqual(data1, self.data1)
+            self.assertEqual(data2, self.data2)
+
+    def test_many_opens(self):
+        # Verify that read() and open() promptly close the file descriptor,
+        # and don't rely on the garbage collector to free resources.
+        self.make_test_archive(TESTFN2)
+        with zipfile.ZipFile(TESTFN2, mode="r") as zipf:
+            for x in range(100):
+                zipf.read('ones')
+                with zipf.open('ones') as zopen1:
+                    pass
+        with open(os.devnull) as f:
+            self.assertLess(f.fileno(), 100)
 
     def tearDown(self):
         unlink(TESTFN2)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -646,6 +646,25 @@
             raise NotImplementedError("compression type %d" % (compress_type,))
 
 
+class _SharedFile:
+    def __init__(self, file, pos, close):
+        self._file = file
+        self._pos = pos
+        self._close = close
+
+    def read(self, n=-1):
+        self._file.seek(self._pos)
+        data = self._file.read(n)
+        self._pos = self._file.tell()
+        return data
+
+    def close(self):
+        if self._file is not None:
+            fileobj = self._file
+            self._file = None
+            self._close(fileobj)
+
+
 class ZipExtFile(io.BufferedIOBase):
     """File-like object for reading an archive member.
        Is returned by ZipFile.open().
@@ -945,7 +964,7 @@
         self.NameToInfo = {}    # Find file info given name
         self.filelist = []      # List of ZipInfo instances for archive
         self.compression = compression  # Method of compression
-        self.mode = key = mode.replace('b', '')[0]
+        self.mode = mode
         self.pwd = None
         self._comment = b''
 
@@ -954,28 +973,33 @@
             # No, it's a filename
             self._filePassed = 0
             self.filename = file
-            modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
-            try:
-                self.fp = io.open(file, modeDict[mode])
-            except OSError:
-                if mode == 'a':
-                    mode = key = 'w'
-                    self.fp = io.open(file, modeDict[mode])
-                else:
+            modeDict = {'r' : 'rb', 'w': 'w+b', 'a' : 'r+b',
+                        'r+b': 'w+b', 'w+b': 'wb'}
+            filemode = modeDict[mode]
+            while True:
+                try:
+                    self.fp = io.open(file, filemode)
+                except OSError:
+                    if filemode in modeDict:
+                        filemode = modeDict[filemode]
+                        continue
                     raise
+                break
         else:
             self._filePassed = 1
             self.fp = file
             self.filename = getattr(file, 'name', None)
+        self._fileRefCnt = 1
 
         try:
-            if key == 'r':
+            if mode == 'r':
                 self._RealGetContents()
-            elif key == 'w':
+            elif mode == 'w':
                 # set the modified flag so central directory gets written
                 # even if no files are added to the archive
                 self._didModify = True
-            elif key == 'a':
+                self.start_dir = 0
+            elif mode == 'a':
                 try:
                     # See if file is a zip file
                     self._RealGetContents()
@@ -988,13 +1012,13 @@
                     # set the modified flag so central directory gets written
                     # even if no files are added to the archive
                     self._didModify = True
+                    self.start_dir = self.fp.tell()
             else:
                 raise RuntimeError('Mode must be "r", "w" or "a"')
         except:
             fp = self.fp
             self.fp = None
-            if not self._filePassed:
-                fp.close()
+            self._fpclose(fp)
             raise
 
     def __enter__(self):
@@ -1181,23 +1205,17 @@
             raise RuntimeError(
                 "Attempt to read ZIP archive that was already closed")
 
-        # Only open a new file for instances where we were not
-        # given a file object in the constructor
-        if self._filePassed:
-            zef_file = self.fp
+        # Make sure we have an info object
+        if isinstance(name, ZipInfo):
+            # 'name' is already an info object
+            zinfo = name
         else:
-            zef_file = io.open(self.filename, 'rb')
+            # Get info object for name
+            zinfo = self.getinfo(name)
 
+        self._fileRefCnt += 1
+        zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose)
         try:
-            # Make sure we have an info object
-            if isinstance(name, ZipInfo):
-                # 'name' is already an info object
-                zinfo = name
-            else:
-                # Get info object for name
-                zinfo = self.getinfo(name)
-            zef_file.seek(zinfo.header_offset, 0)
-
             # Skip the file header:
             fheader = zef_file.read(sizeFileHeader)
             if len(fheader) != sizeFileHeader:
@@ -1256,11 +1274,9 @@
                 if h[11] != check_byte:
                     raise RuntimeError("Bad password for file", name)
 
-            return ZipExtFile(zef_file, mode, zinfo, zd,
-                              close_fileobj=not self._filePassed)
+            return ZipExtFile(zef_file, mode, zinfo, zd, True)
         except:
-            if not self._filePassed:
-                zef_file.close()
+            zef_file.close()
             raise
 
     def extract(self, member, path=None, pwd=None):
@@ -1394,6 +1410,7 @@
 
         zinfo.file_size = st.st_size
         zinfo.flag_bits = 0x00
+        self.fp.seek(self.start_dir, 0)
         zinfo.header_offset = self.fp.tell()    # Start of header bytes
         if zinfo.compress_type == ZIP_LZMA:
             # Compressed data includes an end-of-stream (EOS) marker
@@ -1410,6 +1427,7 @@
             self.filelist.append(zinfo)
             self.NameToInfo[zinfo.filename] = zinfo
             self.fp.write(zinfo.FileHeader(False))
+            self.start_dir = self.fp.tell()
             return
 
         cmpr = _get_compressor(zinfo.compress_type)
@@ -1448,10 +1466,10 @@
                 raise RuntimeError('Compressed size larger than uncompressed size')
         # Seek backwards and write file header (which will now include
         # correct CRC and file sizes)
-        position = self.fp.tell()       # Preserve current position in file
+        self.start_dir = self.fp.tell()       # Preserve current position in file
         self.fp.seek(zinfo.header_offset, 0)
         self.fp.write(zinfo.FileHeader(zip64))
-        self.fp.seek(position, 0)
+        self.fp.seek(self.start_dir, 0)
         self.filelist.append(zinfo)
         self.NameToInfo[zinfo.filename] = zinfo
 
@@ -1480,6 +1498,7 @@
                 "Attempt to write to ZIP archive that was already closed")
 
         zinfo.file_size = len(data)            # Uncompressed size
+        self.fp.seek(self.start_dir, 0)
         zinfo.header_offset = self.fp.tell()    # Start of header data
         if compress_type is not None:
             zinfo.compress_type = compress_type
@@ -1508,6 +1527,7 @@
             self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
                                       zinfo.file_size))
         self.fp.flush()
+        self.start_dir = self.fp.tell()
         self.filelist.append(zinfo)
         self.NameToInfo[zinfo.filename] = zinfo
 
@@ -1523,7 +1543,7 @@
 
         try:
             if self.mode in ("w", "a") and self._didModify: # write ending records
-                pos1 = self.fp.tell()
+                self.fp.seek(self.start_dir, 0)
                 for zinfo in self.filelist:         # write central directory
                     dt = zinfo.date_time
                     dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
@@ -1589,8 +1609,8 @@
                 pos2 = self.fp.tell()
                 # Write end-of-zip-archive record
                 centDirCount = len(self.filelist)
-                centDirSize = pos2 - pos1
-                centDirOffset = pos1
+                centDirSize = pos2 - self.start_dir
+                centDirOffset = self.start_dir
                 requires_zip64 = None
                 if centDirCount > ZIP_FILECOUNT_LIMIT:
                     requires_zip64 = "Files count"
@@ -1626,8 +1646,13 @@
         finally:
             fp = self.fp
             self.fp = None
-            if not self._filePassed:
-                fp.close()
+            self._fpclose(fp)
+
+    def _fpclose(self, fp):
+        assert self._fileRefCnt > 0
+        self._fileRefCnt -= 1
+        if not self._fileRefCnt and not self._filePassed:
+            fp.close()
 
 
 class PyZipFile(ZipFile):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -194,6 +194,11 @@
 Library
 -------
 
+- Issue #14099: ZipFile.open() no longer reopen the underlying file.  Objects
+  returned by ZipFile.open() can now operate independently of the ZipFile even
+  if the ZipFile was created by passing in a file-like object as the first
+  argument to the constructor.
+
 - Issue #22966: Fix __pycache__ pyc file name clobber when pyc_compile is
   asked to compile a source file containing multiple dots in the source file
   name.

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list