[Python-checkins] cpython: Issue #14099: Writing to ZipFile and reading multiple ZipExtFiles is

serhiy.storchaka python-checkins at python.org
Mon Jan 26 13:02:20 CET 2015


https://hg.python.org/cpython/rev/4973ccd46e32
changeset:   94314:4973ccd46e32
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Mon Jan 26 13:53:38 2015 +0200
summary:
  Issue #14099: Writing to ZipFile and reading multiple ZipExtFiles is
threadsafe now.

files:
  Lib/zipfile.py |  401 +++++++++++++++++++-----------------
  Misc/NEWS      |    3 +
  2 files changed, 209 insertions(+), 195 deletions(-)


diff --git a/Lib/zipfile.py b/Lib/zipfile.py
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -13,6 +13,7 @@
 import shutil
 import struct
 import binascii
+import threading
 
 
 try:
@@ -647,16 +648,18 @@
 
 
 class _SharedFile:
-    def __init__(self, file, pos, close):
+    def __init__(self, file, pos, close, lock):
         self._file = file
         self._pos = pos
         self._close = close
+        self._lock = lock
 
     def read(self, n=-1):
-        self._file.seek(self._pos)
-        data = self._file.read(n)
-        self._pos = self._file.tell()
-        return data
+        with self._lock:
+            self._file.seek(self._pos)
+            data = self._file.read(n)
+            self._pos = self._file.tell()
+            return data
 
     def close(self):
         if self._file is not None:
@@ -990,6 +993,7 @@
             self.fp = file
             self.filename = getattr(file, 'name', None)
         self._fileRefCnt = 1
+        self._lock = threading.RLock()
 
         try:
             if mode == 'r':
@@ -1214,7 +1218,7 @@
             zinfo = self.getinfo(name)
 
         self._fileRefCnt += 1
-        zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose)
+        zef_file = _SharedFile(self.fp, zinfo.header_offset, self._fpclose, self._lock)
         try:
             # Skip the file header:
             fheader = zef_file.read(sizeFileHeader)
@@ -1410,68 +1414,69 @@
 
         zinfo.file_size = st.st_size
         zinfo.flag_bits = 0x00
-        self.fp.seek(self.start_dir, 0)
-        zinfo.header_offset = self.fp.tell()    # Start of header bytes
-        if zinfo.compress_type == ZIP_LZMA:
-            # Compressed data includes an end-of-stream (EOS) marker
-            zinfo.flag_bits |= 0x02
+        with self._lock:
+            self.fp.seek(self.start_dir, 0)
+            zinfo.header_offset = self.fp.tell()    # Start of header bytes
+            if zinfo.compress_type == ZIP_LZMA:
+                # Compressed data includes an end-of-stream (EOS) marker
+                zinfo.flag_bits |= 0x02
 
-        self._writecheck(zinfo)
-        self._didModify = True
+            self._writecheck(zinfo)
+            self._didModify = True
 
-        if isdir:
-            zinfo.file_size = 0
-            zinfo.compress_size = 0
-            zinfo.CRC = 0
-            zinfo.external_attr |= 0x10  # MS-DOS directory flag
+            if isdir:
+                zinfo.file_size = 0
+                zinfo.compress_size = 0
+                zinfo.CRC = 0
+                zinfo.external_attr |= 0x10  # MS-DOS directory flag
+                self.filelist.append(zinfo)
+                self.NameToInfo[zinfo.filename] = zinfo
+                self.fp.write(zinfo.FileHeader(False))
+                self.start_dir = self.fp.tell()
+                return
+
+            cmpr = _get_compressor(zinfo.compress_type)
+            with open(filename, "rb") as fp:
+                # Must overwrite CRC and sizes with correct data later
+                zinfo.CRC = CRC = 0
+                zinfo.compress_size = compress_size = 0
+                # Compressed size can be larger than uncompressed size
+                zip64 = self._allowZip64 and \
+                    zinfo.file_size * 1.05 > ZIP64_LIMIT
+                self.fp.write(zinfo.FileHeader(zip64))
+                file_size = 0
+                while 1:
+                    buf = fp.read(1024 * 8)
+                    if not buf:
+                        break
+                    file_size = file_size + len(buf)
+                    CRC = crc32(buf, CRC) & 0xffffffff
+                    if cmpr:
+                        buf = cmpr.compress(buf)
+                        compress_size = compress_size + len(buf)
+                    self.fp.write(buf)
+            if cmpr:
+                buf = cmpr.flush()
+                compress_size = compress_size + len(buf)
+                self.fp.write(buf)
+                zinfo.compress_size = compress_size
+            else:
+                zinfo.compress_size = file_size
+            zinfo.CRC = CRC
+            zinfo.file_size = file_size
+            if not zip64 and self._allowZip64:
+                if file_size > ZIP64_LIMIT:
+                    raise RuntimeError('File size has increased during compressing')
+                if compress_size > ZIP64_LIMIT:
+                    raise RuntimeError('Compressed size larger than uncompressed size')
+            # Seek backwards and write file header (which will now include
+            # correct CRC and file sizes)
+            self.start_dir = self.fp.tell()       # Preserve current position in file
+            self.fp.seek(zinfo.header_offset, 0)
+            self.fp.write(zinfo.FileHeader(zip64))
+            self.fp.seek(self.start_dir, 0)
             self.filelist.append(zinfo)
             self.NameToInfo[zinfo.filename] = zinfo
-            self.fp.write(zinfo.FileHeader(False))
-            self.start_dir = self.fp.tell()
-            return
-
-        cmpr = _get_compressor(zinfo.compress_type)
-        with open(filename, "rb") as fp:
-            # Must overwrite CRC and sizes with correct data later
-            zinfo.CRC = CRC = 0
-            zinfo.compress_size = compress_size = 0
-            # Compressed size can be larger than uncompressed size
-            zip64 = self._allowZip64 and \
-                zinfo.file_size * 1.05 > ZIP64_LIMIT
-            self.fp.write(zinfo.FileHeader(zip64))
-            file_size = 0
-            while 1:
-                buf = fp.read(1024 * 8)
-                if not buf:
-                    break
-                file_size = file_size + len(buf)
-                CRC = crc32(buf, CRC) & 0xffffffff
-                if cmpr:
-                    buf = cmpr.compress(buf)
-                    compress_size = compress_size + len(buf)
-                self.fp.write(buf)
-        if cmpr:
-            buf = cmpr.flush()
-            compress_size = compress_size + len(buf)
-            self.fp.write(buf)
-            zinfo.compress_size = compress_size
-        else:
-            zinfo.compress_size = file_size
-        zinfo.CRC = CRC
-        zinfo.file_size = file_size
-        if not zip64 and self._allowZip64:
-            if file_size > ZIP64_LIMIT:
-                raise RuntimeError('File size has increased during compressing')
-            if compress_size > ZIP64_LIMIT:
-                raise RuntimeError('Compressed size larger than uncompressed size')
-        # Seek backwards and write file header (which will now include
-        # correct CRC and file sizes)
-        self.start_dir = self.fp.tell()       # Preserve current position in file
-        self.fp.seek(zinfo.header_offset, 0)
-        self.fp.write(zinfo.FileHeader(zip64))
-        self.fp.seek(self.start_dir, 0)
-        self.filelist.append(zinfo)
-        self.NameToInfo[zinfo.filename] = zinfo
 
     def writestr(self, zinfo_or_arcname, data, compress_type=None):
         """Write a file into the archive.  The contents is 'data', which
@@ -1498,38 +1503,39 @@
                 "Attempt to write to ZIP archive that was already closed")
 
         zinfo.file_size = len(data)            # Uncompressed size
-        self.fp.seek(self.start_dir, 0)
-        zinfo.header_offset = self.fp.tell()    # Start of header data
-        if compress_type is not None:
-            zinfo.compress_type = compress_type
-        if zinfo.compress_type == ZIP_LZMA:
-            # Compressed data includes an end-of-stream (EOS) marker
-            zinfo.flag_bits |= 0x02
+        with self._lock:
+            self.fp.seek(self.start_dir, 0)
+            zinfo.header_offset = self.fp.tell()    # Start of header data
+            if compress_type is not None:
+                zinfo.compress_type = compress_type
+            if zinfo.compress_type == ZIP_LZMA:
+                # Compressed data includes an end-of-stream (EOS) marker
+                zinfo.flag_bits |= 0x02
 
-        self._writecheck(zinfo)
-        self._didModify = True
-        zinfo.CRC = crc32(data) & 0xffffffff       # CRC-32 checksum
-        co = _get_compressor(zinfo.compress_type)
-        if co:
-            data = co.compress(data) + co.flush()
-            zinfo.compress_size = len(data)    # Compressed size
-        else:
-            zinfo.compress_size = zinfo.file_size
-        zip64 = zinfo.file_size > ZIP64_LIMIT or \
-            zinfo.compress_size > ZIP64_LIMIT
-        if zip64 and not self._allowZip64:
-            raise LargeZipFile("Filesize would require ZIP64 extensions")
-        self.fp.write(zinfo.FileHeader(zip64))
-        self.fp.write(data)
-        if zinfo.flag_bits & 0x08:
-            # Write CRC and file sizes after the file data
-            fmt = '<LQQ' if zip64 else '<LLL'
-            self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
-                                      zinfo.file_size))
-        self.fp.flush()
-        self.start_dir = self.fp.tell()
-        self.filelist.append(zinfo)
-        self.NameToInfo[zinfo.filename] = zinfo
+            self._writecheck(zinfo)
+            self._didModify = True
+            zinfo.CRC = crc32(data) & 0xffffffff       # CRC-32 checksum
+            co = _get_compressor(zinfo.compress_type)
+            if co:
+                data = co.compress(data) + co.flush()
+                zinfo.compress_size = len(data)    # Compressed size
+            else:
+                zinfo.compress_size = zinfo.file_size
+            zip64 = zinfo.file_size > ZIP64_LIMIT or \
+                zinfo.compress_size > ZIP64_LIMIT
+            if zip64 and not self._allowZip64:
+                raise LargeZipFile("Filesize would require ZIP64 extensions")
+            self.fp.write(zinfo.FileHeader(zip64))
+            self.fp.write(data)
+            if zinfo.flag_bits & 0x08:
+                # Write CRC and file sizes after the file data
+                fmt = '<LQQ' if zip64 else '<LLL'
+                self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
+                                          zinfo.file_size))
+            self.fp.flush()
+            self.start_dir = self.fp.tell()
+            self.filelist.append(zinfo)
+            self.NameToInfo[zinfo.filename] = zinfo
 
     def __del__(self):
         """Call the "close()" method in case the user forgot."""
@@ -1543,111 +1549,116 @@
 
         try:
             if self.mode in ("w", "a") and self._didModify: # write ending records
-                self.fp.seek(self.start_dir, 0)
-                for zinfo in self.filelist:         # write central directory
-                    dt = zinfo.date_time
-                    dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
-                    dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
-                    extra = []
-                    if zinfo.file_size > ZIP64_LIMIT \
-                       or zinfo.compress_size > ZIP64_LIMIT:
-                        extra.append(zinfo.file_size)
-                        extra.append(zinfo.compress_size)
-                        file_size = 0xffffffff
-                        compress_size = 0xffffffff
-                    else:
-                        file_size = zinfo.file_size
-                        compress_size = zinfo.compress_size
-
-                    if zinfo.header_offset > ZIP64_LIMIT:
-                        extra.append(zinfo.header_offset)
-                        header_offset = 0xffffffff
-                    else:
-                        header_offset = zinfo.header_offset
-
-                    extra_data = zinfo.extra
-                    min_version = 0
-                    if extra:
-                        # Append a ZIP64 field to the extra's
-                        extra_data = struct.pack(
-                            '<HH' + 'Q'*len(extra),
-                            1, 8*len(extra), *extra) + extra_data
-
-                        min_version = ZIP64_VERSION
-
-                    if zinfo.compress_type == ZIP_BZIP2:
-                        min_version = max(BZIP2_VERSION, min_version)
-                    elif zinfo.compress_type == ZIP_LZMA:
-                        min_version = max(LZMA_VERSION, min_version)
-
-                    extract_version = max(min_version, zinfo.extract_version)
-                    create_version = max(min_version, zinfo.create_version)
-                    try:
-                        filename, flag_bits = zinfo._encodeFilenameFlags()
-                        centdir = struct.pack(structCentralDir,
-                                              stringCentralDir, create_version,
-                                              zinfo.create_system, extract_version, zinfo.reserved,
-                                              flag_bits, zinfo.compress_type, dostime, dosdate,
-                                              zinfo.CRC, compress_size, file_size,
-                                              len(filename), len(extra_data), len(zinfo.comment),
-                                              0, zinfo.internal_attr, zinfo.external_attr,
-                                              header_offset)
-                    except DeprecationWarning:
-                        print((structCentralDir, stringCentralDir, create_version,
-                               zinfo.create_system, extract_version, zinfo.reserved,
-                               zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
-                               zinfo.CRC, compress_size, file_size,
-                               len(zinfo.filename), len(extra_data), len(zinfo.comment),
-                               0, zinfo.internal_attr, zinfo.external_attr,
-                               header_offset), file=sys.stderr)
-                        raise
-                    self.fp.write(centdir)
-                    self.fp.write(filename)
-                    self.fp.write(extra_data)
-                    self.fp.write(zinfo.comment)
-
-                pos2 = self.fp.tell()
-                # Write end-of-zip-archive record
-                centDirCount = len(self.filelist)
-                centDirSize = pos2 - self.start_dir
-                centDirOffset = self.start_dir
-                requires_zip64 = None
-                if centDirCount > ZIP_FILECOUNT_LIMIT:
-                    requires_zip64 = "Files count"
-                elif centDirOffset > ZIP64_LIMIT:
-                    requires_zip64 = "Central directory offset"
-                elif centDirSize > ZIP64_LIMIT:
-                    requires_zip64 = "Central directory size"
-                if requires_zip64:
-                    # Need to write the ZIP64 end-of-archive records
-                    if not self._allowZip64:
-                        raise LargeZipFile(requires_zip64 +
-                                           " would require ZIP64 extensions")
-                    zip64endrec = struct.pack(
-                        structEndArchive64, stringEndArchive64,
-                        44, 45, 45, 0, 0, centDirCount, centDirCount,
-                        centDirSize, centDirOffset)
-                    self.fp.write(zip64endrec)
-
-                    zip64locrec = struct.pack(
-                        structEndArchive64Locator,
-                        stringEndArchive64Locator, 0, pos2, 1)
-                    self.fp.write(zip64locrec)
-                    centDirCount = min(centDirCount, 0xFFFF)
-                    centDirSize = min(centDirSize, 0xFFFFFFFF)
-                    centDirOffset = min(centDirOffset, 0xFFFFFFFF)
-
-                endrec = struct.pack(structEndArchive, stringEndArchive,
-                                     0, 0, centDirCount, centDirCount,
-                                     centDirSize, centDirOffset, len(self._comment))
-                self.fp.write(endrec)
-                self.fp.write(self._comment)
-                self.fp.flush()
+                with self._lock:
+                    self.fp.seek(self.start_dir, 0)
+                    self._write_end_record()
         finally:
             fp = self.fp
             self.fp = None
             self._fpclose(fp)
 
+    def _write_end_record(self):
+        self.fp.seek(self.start_dir, 0)
+        for zinfo in self.filelist:         # write central directory
+            dt = zinfo.date_time
+            dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
+            dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
+            extra = []
+            if zinfo.file_size > ZIP64_LIMIT \
+               or zinfo.compress_size > ZIP64_LIMIT:
+                extra.append(zinfo.file_size)
+                extra.append(zinfo.compress_size)
+                file_size = 0xffffffff
+                compress_size = 0xffffffff
+            else:
+                file_size = zinfo.file_size
+                compress_size = zinfo.compress_size
+
+            if zinfo.header_offset > ZIP64_LIMIT:
+                extra.append(zinfo.header_offset)
+                header_offset = 0xffffffff
+            else:
+                header_offset = zinfo.header_offset
+
+            extra_data = zinfo.extra
+            min_version = 0
+            if extra:
+                # Append a ZIP64 field to the extra's
+                extra_data = struct.pack(
+                    '<HH' + 'Q'*len(extra),
+                    1, 8*len(extra), *extra) + extra_data
+
+                min_version = ZIP64_VERSION
+
+            if zinfo.compress_type == ZIP_BZIP2:
+                min_version = max(BZIP2_VERSION, min_version)
+            elif zinfo.compress_type == ZIP_LZMA:
+                min_version = max(LZMA_VERSION, min_version)
+
+            extract_version = max(min_version, zinfo.extract_version)
+            create_version = max(min_version, zinfo.create_version)
+            try:
+                filename, flag_bits = zinfo._encodeFilenameFlags()
+                centdir = struct.pack(structCentralDir,
+                                      stringCentralDir, create_version,
+                                      zinfo.create_system, extract_version, zinfo.reserved,
+                                      flag_bits, zinfo.compress_type, dostime, dosdate,
+                                      zinfo.CRC, compress_size, file_size,
+                                      len(filename), len(extra_data), len(zinfo.comment),
+                                      0, zinfo.internal_attr, zinfo.external_attr,
+                                      header_offset)
+            except DeprecationWarning:
+                print((structCentralDir, stringCentralDir, create_version,
+                       zinfo.create_system, extract_version, zinfo.reserved,
+                       zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
+                       zinfo.CRC, compress_size, file_size,
+                       len(zinfo.filename), len(extra_data), len(zinfo.comment),
+                       0, zinfo.internal_attr, zinfo.external_attr,
+                       header_offset), file=sys.stderr)
+                raise
+            self.fp.write(centdir)
+            self.fp.write(filename)
+            self.fp.write(extra_data)
+            self.fp.write(zinfo.comment)
+
+        pos2 = self.fp.tell()
+        # Write end-of-zip-archive record
+        centDirCount = len(self.filelist)
+        centDirSize = pos2 - self.start_dir
+        centDirOffset = self.start_dir
+        requires_zip64 = None
+        if centDirCount > ZIP_FILECOUNT_LIMIT:
+            requires_zip64 = "Files count"
+        elif centDirOffset > ZIP64_LIMIT:
+            requires_zip64 = "Central directory offset"
+        elif centDirSize > ZIP64_LIMIT:
+            requires_zip64 = "Central directory size"
+        if requires_zip64:
+            # Need to write the ZIP64 end-of-archive records
+            if not self._allowZip64:
+                raise LargeZipFile(requires_zip64 +
+                                   " would require ZIP64 extensions")
+            zip64endrec = struct.pack(
+                structEndArchive64, stringEndArchive64,
+                44, 45, 45, 0, 0, centDirCount, centDirCount,
+                centDirSize, centDirOffset)
+            self.fp.write(zip64endrec)
+
+            zip64locrec = struct.pack(
+                structEndArchive64Locator,
+                stringEndArchive64Locator, 0, pos2, 1)
+            self.fp.write(zip64locrec)
+            centDirCount = min(centDirCount, 0xFFFF)
+            centDirSize = min(centDirSize, 0xFFFFFFFF)
+            centDirOffset = min(centDirOffset, 0xFFFFFFFF)
+
+        endrec = struct.pack(structEndArchive, stringEndArchive,
+                             0, 0, centDirCount, centDirCount,
+                             centDirSize, centDirOffset, len(self._comment))
+        self.fp.write(endrec)
+        self.fp.write(self._comment)
+        self.fp.flush()
+
     def _fpclose(self, fp):
         assert self._fileRefCnt > 0
         self._fileRefCnt -= 1
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -218,6 +218,9 @@
 Library
 -------
 
+- Issue #14099: Writing to ZipFile and reading multiple ZipExtFiles is
+  threadsafe now.
+
 - Issue #19361: JSON decoder now raises JSONDecodeError instead of ValueError.
 
 - Issue #18518: timeit now rejects statements which can't be compiled outside

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list