[Python-checkins] r53765 - in python/trunk: Doc/lib/libzipfile.tex Lib/test/test_zipfile.py Lib/zipfile.py Misc/NEWS

martin.v.loewis python-checkins at python.org
Tue Feb 13 10:49:40 CET 2007


Author: martin.v.loewis
Date: Tue Feb 13 10:49:38 2007
New Revision: 53765

Modified:
   python/trunk/Doc/lib/libzipfile.tex
   python/trunk/Lib/test/test_zipfile.py
   python/trunk/Lib/zipfile.py
   python/trunk/Misc/NEWS
Log:
Patch #698833: Support file decryption in zipfile.


Modified: python/trunk/Doc/lib/libzipfile.tex
==============================================================================
--- python/trunk/Doc/lib/libzipfile.tex	(original)
+++ python/trunk/Doc/lib/libzipfile.tex	Tue Feb 13 10:49:38 2007
@@ -17,8 +17,10 @@
 {PKZIP Application Note}.
 
 This module does not currently handle ZIP files which have appended
-comments, or multi-disk ZIP files. It can handle ZIP files that use the 
-ZIP64 extensions (that is ZIP files that are more than 4 GByte in size).
+comments, or multi-disk ZIP files. It can handle ZIP files that use
+the ZIP64 extensions (that is ZIP files that are more than 4 GByte in
+size).  It supports decryption of encrypted files in ZIP archives, but
+it cannot currently create an encrypted file.  
 
 The available attributes of this module are:
 
@@ -138,9 +140,18 @@
   Print a table of contents for the archive to \code{sys.stdout}.
 \end{methoddesc}
 
-\begin{methoddesc}{read}{name}
+\begin{methoddesc}{setpassword}{pwd}
+  Set \var{pwd} as default password to extract encrypted files.
+  \versionadded{2.6}
+\end{methoddesc}
+
+\begin{methoddesc}{read}{name\optional{, pwd}}
   Return the bytes of the file in the archive.  The archive must be
-  open for read or append.
+  open for read or append. \var{pwd} is the password used for encrypted 
+  files and, if specified, it will override the default password set with
+  \method{setpassword()}.
+
+  \versionchanged[\var{pwd} was added]{2.6}
 \end{methoddesc}
 
 \begin{methoddesc}{testzip}{}

Modified: python/trunk/Lib/test/test_zipfile.py
==============================================================================
--- python/trunk/Lib/test/test_zipfile.py	(original)
+++ python/trunk/Lib/test/test_zipfile.py	Tue Feb 13 10:49:38 2007
@@ -349,8 +349,49 @@
         # and report that the first file in the archive was corrupt.
         self.assertRaises(RuntimeError, zipf.testzip)
 
+
+class DecryptionTests(unittest.TestCase):
+    # This test checks that ZIP decryption works. Since the library does not
+    # support encryption at the moment, we use a pre-generated encrypted
+    # ZIP file
+
+    data = (
+    'PK\x03\x04\x14\x00\x01\x00\x00\x00n\x92i.#y\xef?&\x00\x00\x00\x1a\x00'
+    '\x00\x00\x08\x00\x00\x00test.txt\xfa\x10\xa0gly|\xfa-\xc5\xc0=\xf9y'
+    '\x18\xe0\xa8r\xb3Z}Lg\xbc\xae\xf9|\x9b\x19\xe4\x8b\xba\xbb)\x8c\xb0\xdbl'
+    'PK\x01\x02\x14\x00\x14\x00\x01\x00\x00\x00n\x92i.#y\xef?&\x00\x00\x00'
+    '\x1a\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x01\x00 \x00\xb6\x81'
+    '\x00\x00\x00\x00test.txtPK\x05\x06\x00\x00\x00\x00\x01\x00\x01\x006\x00'
+    '\x00\x00L\x00\x00\x00\x00\x00' )
+
+    plain = 'zipfile.py encryption test'
+
+    def setUp(self):
+        fp = open(TESTFN, "wb")
+        fp.write(self.data)
+        fp.close()
+        self.zip = zipfile.ZipFile(TESTFN, "r")
+
+    def tearDown(self):
+        self.zip.close()
+        os.unlink(TESTFN)
+
+    def testNoPassword(self):
+        # Reading the encrypted file without password
+        # must generate a RunTime exception
+        self.assertRaises(RuntimeError, self.zip.read, "test.txt")
+
+    def testBadPassword(self):
+        self.zip.setpassword("perl")
+        self.assertRaises(RuntimeError, self.zip.read, "test.txt")
+            
+    def testGoodPassword(self):
+        self.zip.setpassword("python")
+        self.assertEquals(self.zip.read("test.txt"), self.plain)
+
 def test_main():
-    run_unittest(TestsWithSourceFile, TestZip64InSmallFiles, OtherTests, PyZipFileTests)
+    run_unittest(TestsWithSourceFile, TestZip64InSmallFiles, OtherTests, 
+                 PyZipFileTests, DecryptionTests)
     #run_unittest(TestZip64InSmallFiles)
 
 if __name__ == "__main__":

Modified: python/trunk/Lib/zipfile.py
==============================================================================
--- python/trunk/Lib/zipfile.py	(original)
+++ python/trunk/Lib/zipfile.py	Tue Feb 13 10:49:38 2007
@@ -296,6 +296,65 @@
             extra = extra[ln+4:]
 
 
+class _ZipDecrypter:
+    """Class to handle decryption of files stored within a ZIP archive.
+
+    ZIP supports a password-based form of encryption. Even though known
+    plaintext attacks have been found against it, it is still useful
+    for low-level securicy.
+
+    Usage:
+        zd = _ZipDecrypter(mypwd)
+        plain_char = zd(cypher_char)
+        plain_text = map(zd, cypher_text)
+    """
+
+    def _GenerateCRCTable():
+        """Generate a CRC-32 table.
+
+        ZIP encryption uses the CRC32 one-byte primitive for scrambling some
+        internal keys. We noticed that a direct implementation is faster than
+        relying on binascii.crc32().
+        """
+        poly = 0xedb88320
+        table = [0] * 256
+        for i in range(256):
+            crc = i
+            for j in range(8):
+                if crc & 1:
+                    crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
+                else:
+                    crc = ((crc >> 1) & 0x7FFFFFFF)
+            table[i] = crc
+        return table
+    crctable = _GenerateCRCTable()
+
+    def _crc32(self, ch, crc):
+        """Compute the CRC32 primitive on one byte."""
+        return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
+
+    def __init__(self, pwd):
+        self.key0 = 305419896
+        self.key1 = 591751049
+        self.key2 = 878082192
+        for p in pwd:
+            self._UpdateKeys(p)
+
+    def _UpdateKeys(self, c):
+        self.key0 = self._crc32(c, self.key0)
+        self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
+        self.key1 = (self.key1 * 134775813 + 1) & 4294967295
+        self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
+
+    def __call__(self, c):
+        """Decrypt a single character."""
+        c = ord(c)
+        k = self.key2 | 2
+        c = c ^ (((k * (k^1)) >> 8) & 255)
+        c = chr(c)
+        self._UpdateKeys(c)
+        return c
+
 class ZipFile:
     """ Class with methods to open, read, write, close, list zip files.
 
@@ -330,6 +389,7 @@
         self.filelist = []      # List of ZipInfo instances for archive
         self.compression = compression  # Method of compression
         self.mode = key = mode.replace('b', '')[0]
+        self.pwd = None
 
         # Check if we were passed a file-like object
         if isinstance(file, basestring):
@@ -461,7 +521,11 @@
         """Return the instance of ZipInfo given 'name'."""
         return self.NameToInfo[name]
 
-    def read(self, name):
+    def setpassword(self, pwd):
+        """Set default password for encrypted files."""
+        self.pwd = pwd
+
+    def read(self, name, pwd=None):
         """Return file bytes (as a string) for name."""
         if self.mode not in ("r", "a"):
             raise RuntimeError, 'read() requires mode "r" or "a"'
@@ -469,6 +533,13 @@
             raise RuntimeError, \
                   "Attempt to read ZIP archive that was already closed"
         zinfo = self.getinfo(name)
+        is_encrypted = zinfo.flag_bits & 0x1
+        if is_encrypted:
+            if not pwd:
+                pwd = self.pwd
+            if not pwd:
+                raise RuntimeError, "File %s is encrypted, " \
+                      "password required for extraction" % name
         filepos = self.fp.tell()
 
         self.fp.seek(zinfo.header_offset, 0)
@@ -489,6 +560,18 @@
                           zinfo.orig_filename, fname)
 
         bytes = self.fp.read(zinfo.compress_size)
+        # Go with decryption
+        if is_encrypted:
+            zd = _ZipDecrypter(pwd)
+            # The first 12 bytes in the cypher stream is an encryption header
+            #  used to strengthen the algorithm. The first 11 bytes are
+            #  completely random, while the 12th contains the MSB of the CRC,
+            #  and is used to check the correctness of the password.
+            h = map(zd, bytes[0:12])
+            if ord(h[11]) != ((zinfo.CRC>>24)&255):
+                raise RuntimeError, "Bad password for file %s" % name
+            bytes = "".join(map(zd, bytes[12:]))
+        # Go with decompression
         self.fp.seek(filepos, 0)
         if zinfo.compress_type == ZIP_STORED:
             pass

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Tue Feb 13 10:49:38 2007
@@ -128,6 +128,8 @@
 Library
 -------
 
+- Patch #698833: Support file decryption in zipfile.
+
 - Patch #685268: Consider a package's __path__ in imputil.
 
 - Patch 1463026: Support default namespace in XMLGenerator.


More information about the Python-checkins mailing list