[Python-checkins] r68319 - in python/trunk: Doc/library/gzip.rst Lib/gzip.py Lib/test/test_gzip.py Misc/NEWS

antoine.pitrou python-checkins at python.org
Sun Jan 4 22:29:24 CET 2009


Author: antoine.pitrou
Date: Sun Jan  4 22:29:23 2009
New Revision: 68319

Log:
Issue #4272: Add an optional argument to the GzipFile constructor to override the timestamp in the gzip stream.



Modified:
   python/trunk/Doc/library/gzip.rst
   python/trunk/Lib/gzip.py
   python/trunk/Lib/test/test_gzip.py
   python/trunk/Misc/NEWS

Modified: python/trunk/Doc/library/gzip.rst
==============================================================================
--- python/trunk/Doc/library/gzip.rst	(original)
+++ python/trunk/Doc/library/gzip.rst	Sun Jan  4 22:29:23 2009
@@ -24,7 +24,7 @@
 The module defines the following items:
 
 
-.. class:: GzipFile([filename[, mode[, compresslevel[, fileobj]]]])
+.. class:: GzipFile([filename[, mode[, compresslevel[, fileobj[, mtime]]]]])
 
    Constructor for the :class:`GzipFile` class, which simulates most of the methods
    of a file object, with the exception of the :meth:`readinto` and
@@ -52,6 +52,15 @@
    level of compression; ``1`` is fastest and produces the least compression, and
    ``9`` is slowest and produces the most compression.  The default is ``9``.
 
+   The *mtime* argument is an optional numeric timestamp to be written to
+   the stream when compressing.  All :program:`gzip`compressed streams are
+   required to contain a timestamp.  If omitted or ``None``, the current
+   time is used.  This module ignores the timestamp when decompressing;
+   however, some programs, such as :program:`gunzip`\ , make use of it.
+   The format of the timestamp is the same as that of the return value of
+   ``time.time()`` and of the ``st_mtime`` member of the object returned
+   by ``os.stat()``.
+
    Calling a :class:`GzipFile` object's :meth:`close` method does not close
    *fileobj*, since you might wish to append more material after the compressed
    data.  This also allows you to pass a :class:`StringIO` object opened for

Modified: python/trunk/Lib/gzip.py
==============================================================================
--- python/trunk/Lib/gzip.py	(original)
+++ python/trunk/Lib/gzip.py	Sun Jan  4 22:29:23 2009
@@ -42,7 +42,7 @@
     max_read_chunk = 10 * 1024 * 1024   # 10Mb
 
     def __init__(self, filename=None, mode=None,
-                 compresslevel=9, fileobj=None):
+                 compresslevel=9, fileobj=None, mtime=None):
         """Constructor for the GzipFile class.
 
         At least one of fileobj and filename must be given a
@@ -69,6 +69,15 @@
         level of compression; 1 is fastest and produces the least compression,
         and 9 is slowest and produces the most compression.  The default is 9.
 
+        The mtime argument is an optional numeric timestamp to be written
+        to the stream when compressing.  All gzip compressed streams
+        are required to contain a timestamp.  If omitted or None, the
+        current time is used.  This module ignores the timestamp when
+        decompressing; however, some programs, such as gunzip, make use
+        of it.  The format of the timestamp is the same as that of the
+        return value of time.time() and of the st_mtime member of the
+        object returned by os.stat().
+
         """
 
         # guarantee the file is opened in binary mode on platforms
@@ -107,6 +116,7 @@
 
         self.fileobj = fileobj
         self.offset = 0
+        self.mtime = mtime
 
         if self.mode == WRITE:
             self._write_gzip_header()
@@ -140,7 +150,10 @@
         if fname:
             flags = FNAME
         self.fileobj.write(chr(flags))
-        write32u(self.fileobj, long(time.time()))
+        mtime = self.mtime
+        if mtime is None:
+            mtime = time.time()
+        write32u(self.fileobj, long(mtime))
         self.fileobj.write('\002')
         self.fileobj.write('\377')
         if fname:
@@ -158,10 +171,10 @@
         if method != 8:
             raise IOError, 'Unknown compression method'
         flag = ord( self.fileobj.read(1) )
-        # modtime = self.fileobj.read(4)
+        self.mtime = read32(self.fileobj)
         # extraflag = self.fileobj.read(1)
         # os = self.fileobj.read(1)
-        self.fileobj.read(6)
+        self.fileobj.read(2)
 
         if flag & FEXTRA:
             # Read & discard the extra field, if present

Modified: python/trunk/Lib/test/test_gzip.py
==============================================================================
--- python/trunk/Lib/test/test_gzip.py	(original)
+++ python/trunk/Lib/test/test_gzip.py	Sun Jan  4 22:29:23 2009
@@ -6,6 +6,7 @@
 from test import test_support
 import os
 import gzip
+import struct
 
 
 data1 = """  int length=DEFAULTALLOC, err = Z_OK;
@@ -160,6 +161,67 @@
             self.assertEqual(f.name, self.filename)
             f.close()
 
+    def test_mtime(self):
+        mtime = 123456789
+        fWrite = gzip.GzipFile(self.filename, 'w', mtime = mtime)
+        fWrite.write(data1)
+        fWrite.close()
+
+        fRead = gzip.GzipFile(self.filename)
+        dataRead = fRead.read()
+        self.assertEqual(dataRead, data1)
+        self.assert_(hasattr(fRead, 'mtime'))
+        self.assertEqual(fRead.mtime, mtime)
+        fRead.close()
+
+    def test_metadata(self):
+        mtime = 123456789
+
+        fWrite = gzip.GzipFile(self.filename, 'w', mtime = mtime)
+        fWrite.write(data1)
+        fWrite.close()
+
+        fRead = open(self.filename, 'rb')
+
+        # see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html
+
+        idBytes = fRead.read(2)
+        self.assertEqual(idBytes, '\x1f\x8b') # gzip ID
+
+        cmByte = fRead.read(1)
+        self.assertEqual(cmByte, '\x08') # deflate
+
+        flagsByte = fRead.read(1)
+        self.assertEqual(flagsByte, '\x08') # only the FNAME flag is set
+
+        mtimeBytes = fRead.read(4)
+        self.assertEqual(mtimeBytes, struct.pack('<i', mtime)) # little-endian
+
+        xflByte = fRead.read(1)
+        self.assertEqual(xflByte, '\x02') # maximum compression
+
+        osByte = fRead.read(1)
+        self.assertEqual(osByte, '\xff') # OS "unknown" (OS-independent)
+
+        # Since the FNAME flag is set, the zero-terminated filename follows.
+        # RFC 1952 specifies that this is the name of the input file, if any.
+        # However, the gzip module defaults to storing the name of the output
+        # file in this field.
+        nameBytes = fRead.read(len(self.filename) + 1)
+        self.assertEqual(nameBytes, self.filename + '\x00')
+
+        # Since no other flags were set, the header ends here.
+        # Rather than process the compressed data, let's seek to the trailer.
+        fRead.seek(os.stat(self.filename).st_size - 8)
+
+        crc32Bytes = fRead.read(4) # CRC32 of uncompressed data [data1]
+        self.assertEqual(crc32Bytes, '\xaf\xd7d\x83')
+
+        isizeBytes = fRead.read(4)
+        self.assertEqual(isizeBytes, struct.pack('<i', len(data1)))
+
+        fRead.close()
+
 def test_main(verbose=None):
     test_support.run_unittest(TestGzip)
 

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Sun Jan  4 22:29:23 2009
@@ -108,6 +108,11 @@
 Library
 -------
 
+- Issue #4272: Add an optional argument to the GzipFile constructor to override
+  the timestamp in the gzip stream. The default value remains the current time.
+  The information can be used by e.g. gunzip when decompressing. Patch by
+  Jacques Frechet.
+
 - Restore Python 2.3 compatibility for decimal.py.
 
 - Issue #1702551: distutils sdist was not excluding VCS directories under 


More information about the Python-checkins mailing list