[pypy-svn] r55216 - in pypy/dist/pypy/rlib: . test

fijal at codespeak.net fijal at codespeak.net
Sun May 25 21:41:00 CEST 2008


Author: fijal
Date: Sun May 25 21:40:58 2008
New Revision: 55216

Added:
   pypy/dist/pypy/rlib/rzipfile.py   (contents, props changed)
   pypy/dist/pypy/rlib/test/test_rzipfile.py   (contents, props changed)
Log:
Enough support for zipfile on interp level to pass simple test.
This will be used by zipimport.


Added: pypy/dist/pypy/rlib/rzipfile.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/rlib/rzipfile.py	Sun May 25 21:40:58 2008
@@ -0,0 +1,242 @@
+
+from zipfile import ZIP_STORED, ZIP_DEFLATED
+from pypy.rlib.streamio import open_file_as_stream
+from pypy.rlib.rstruct import runpack
+import os
+from pypy.rlib import rzlib
+from pypy.rlib.rarithmetic import r_uint, intmask
+
+# XXX hack to get crc32 to work
+from pypy.lib.binascii import crc_32_tab
+
+rcrc_32_tab = [r_uint(i) for i in crc_32_tab]
+
+def crc32(s, crc=0):
+    result = 0
+    crc = ~r_uint(crc) & 0xffffffffL
+    for c in s:
+        crc = rcrc_32_tab[(crc ^ r_uint(ord(c))) & 0xffL] ^ (crc >> 8)
+        #/* Note:  (crc >> 8) MUST zero fill on left
+
+        result = crc ^ 0xffffffffL
+    
+    if result > 2**31:
+        result = ((result + 2**31) % r_uint(2**32)) - 2**31
+
+    return result
+
+# parts copied from zipfile library implementation
+
+class BadZipfile(Exception):
+    pass
+
+# Here are some struct module formats for reading headers
+structEndArchive = "<4s4H2lH"     # 9 items, end of archive, 22 bytes
+stringEndArchive = "PK\005\006"   # magic number for end of archive record
+structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
+stringCentralDir = "PK\001\002"   # magic number for central directory
+structFileHeader = "<4s2B4HlLL2H"  # 12 items, file header record, 30 bytes
+stringFileHeader = "PK\003\004"   # magic number for file header
+
+# indexes of entries in the central directory structure
+_CD_SIGNATURE = 0
+_CD_CREATE_VERSION = 1
+_CD_CREATE_SYSTEM = 2
+_CD_EXTRACT_VERSION = 3
+_CD_EXTRACT_SYSTEM = 4                  # is this meaningful?
+_CD_FLAG_BITS = 5
+_CD_COMPRESS_TYPE = 6
+_CD_TIME = 7
+_CD_DATE = 8
+_CD_CRC = 9
+_CD_COMPRESSED_SIZE = 10
+_CD_UNCOMPRESSED_SIZE = 11
+_CD_FILENAME_LENGTH = 12
+_CD_EXTRA_FIELD_LENGTH = 13
+_CD_COMMENT_LENGTH = 14
+_CD_DISK_NUMBER_START = 15
+_CD_INTERNAL_FILE_ATTRIBUTES = 16
+_CD_EXTERNAL_FILE_ATTRIBUTES = 17
+_CD_LOCAL_HEADER_OFFSET = 18
+
+# indexes of entries in the local file header structure
+_FH_SIGNATURE = 0
+_FH_EXTRACT_VERSION = 1
+_FH_EXTRACT_SYSTEM = 2                  # is this meaningful?
+_FH_GENERAL_PURPOSE_FLAG_BITS = 3
+_FH_COMPRESSION_METHOD = 4
+_FH_LAST_MOD_TIME = 5
+_FH_LAST_MOD_DATE = 6
+_FH_CRC = 7
+_FH_COMPRESSED_SIZE = 8
+_FH_UNCOMPRESSED_SIZE = 9
+_FH_FILENAME_LENGTH = 10
+_FH_EXTRA_FIELD_LENGTH = 11
+
+class EndRecStruct(object):
+    def __init__(self, stuff, comment, filesize):
+        self.stuff = stuff
+        self.comment = comment
+        self.filesize = filesize
+
+def _EndRecData(fpin):
+    """Return data from the "End of Central Directory" record, or None.
+
+    The data is a list of the nine items in the ZIP "End of central dir"
+    record followed by a tenth item, the file seek offset of this record."""
+    fpin.seek(-22, 2)               # Assume no archive comment.
+    filesize = fpin.tell() + 22     # Get file size
+    data = fpin.readall()
+    start = len(data)-2
+    assert start > 0
+    if data[0:4] == stringEndArchive and data[start:] == "\000\000":
+        endrec = runpack(structEndArchive, data)
+        return EndRecStruct(endrec, "", filesize - 22)
+    # Search the last END_BLOCK bytes of the file for the record signature.
+    # The comment is appended to the ZIP file and has a 16 bit length.
+    # So the comment may be up to 64K long.  We limit the search for the
+    # signature to a few Kbytes at the end of the file for efficiency.
+    # also, the signature must not appear in the comment.
+    END_BLOCK = min(filesize, 1024 * 4)
+    fpin.seek(filesize - END_BLOCK, 0)
+    data = fpin.readall()
+    start = data.rfind(stringEndArchive)
+    if start >= 0:     # Correct signature string was found
+        endrec = runpack(structEndArchive, data[start:start+22])
+        comment = data[start+22:]
+        if endrec[7] == len(comment):     # Comment length checks out
+            # Append the archive comment and start offset
+            return EndRecStruct(endrec, comment, filesize - END_BLOCK + start)
+    return      # Error, return None
+
+class RZipInfo(object):
+    def __init__(self, filename, date_time=(1980,1,1,0,0,0)):
+        self.orig_filename = filename
+        null_byte = filename.find(chr(0))
+        if null_byte >= 0:
+            filename = filename[0:null_byte]
+# This is used to ensure paths in generated ZIP files always use
+# forward slashes as the directory separator, as required by the
+# ZIP format specification.
+        if os.sep != "/":
+            filename = filename.replace(os.sep, "/")
+        self.filename = filename        # Normalized file name
+        self.date_time = date_time      # year, month, day, hour, min, sec
+        # Standard values:
+        self.compress_type = ZIP_STORED # Type of compression for the file
+        self.comment = ""               # Comment for each file
+        self.extra = ""                 # ZIP extra data
+        self.create_system = 0          # System which created ZIP archive
+        self.create_version = 20        # Version which created ZIP archive
+        self.extract_version = 20       # Version needed to extract archive
+        self.reserved = 0               # Must be zero
+        self.flag_bits = 0              # ZIP flag bits
+        self.volume = 0                 # Volume number of file header
+        self.internal_attr = 0          # Internal attributes
+        self.external_attr = 0          # External file attributes
+        # Other attributes are set by class ZipFile:
+        # header_offset         Byte offset to the file header
+        # file_offset           Byte offset to the start of the file data
+        # CRC                   CRC-32 of the uncompressed file
+        # compress_size         Size of the compressed file
+        # file_size             Size of the uncompressed file
+
+class RZipFile(object):
+    def __init__(self, zipname, mode='r', compression=ZIP_STORED):
+        if mode != 'r':
+            raise TypeError("Read only support by now")
+        self.compression = compression
+        self.mode = mode
+        self.filelist = []
+        self.NameToInfo = {}
+        fp = open_file_as_stream(zipname, mode, 1024)
+        self._GetContents(fp)
+        self.fp = fp
+
+    def _GetContents(self, fp):
+        endrec = _EndRecData(fp)
+        if not endrec:
+            raise BadZipfile, "File is not a zip file"
+        size_cd = endrec.stuff[5]             # bytes in central directory
+        offset_cd = endrec.stuff[6]   # offset of central directory
+        self.comment = endrec.comment
+        x = endrec.filesize - size_cd
+        concat = x - offset_cd
+        self.start_dir = offset_cd + concat
+        fp.seek(self.start_dir, 0)
+        total = 0
+        while total < size_cd:
+            centdir = fp.read(46)
+            total = total + 46
+            if centdir[0:4] != stringCentralDir:
+                raise BadZipfile, "Bad magic number for central directory"
+            centdir = runpack(structCentralDir, centdir)
+            filename = fp.read(centdir[_CD_FILENAME_LENGTH])
+            # Create ZipInfo instance to store file information
+            x = RZipInfo(filename)
+            x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
+            x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
+            total = (total + centdir[_CD_FILENAME_LENGTH]
+                     + centdir[_CD_EXTRA_FIELD_LENGTH]
+                     + centdir[_CD_COMMENT_LENGTH])
+            x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
+            # file_offset must be computed below...
+            (x.create_version, x.create_system, x.extract_version, x.reserved,
+                x.flag_bits, x.compress_type, t, d,
+                x.CRC, x.compress_size, x.file_size) = centdir[1:12]
+            x.volume, x.internal_attr, x.external_attr = centdir[15:18]
+            # Convert date/time code to (year, month, day, hour, min, sec)
+            x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
+                                     t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
+            self.filelist.append(x)
+            self.NameToInfo[x.filename] = x
+        for data in self.filelist:
+            fp.seek(data.header_offset, 0)
+            fheader = fp.read(30)
+            if fheader[0:4] != stringFileHeader:
+                raise BadZipfile, "Bad magic number for file header"
+            fheader = runpack(structFileHeader, fheader)
+            # file_offset is computed here, since the extra field for
+            # the central directory and for the local file header
+            # refer to different fields, and they can have different
+            # lengths
+            data.file_offset = (data.header_offset + 30
+                                + fheader[_FH_FILENAME_LENGTH]
+                                + fheader[_FH_EXTRA_FIELD_LENGTH])
+            fname = fp.read(fheader[_FH_FILENAME_LENGTH])
+            if fname != data.orig_filename:
+                raise RuntimeError, \
+                      'File name in directory "%s" and header "%s" differ.' % (
+                          data.orig_filename, fname)
+        fp.seek(self.start_dir, 0)
+        
+    def getinfo(self, filename):
+        """Return the instance of ZipInfo given 'filename'."""
+        return self.NameToInfo[filename]
+
+    def read(self, filename):
+        zinfo = self.getinfo(filename)
+        filepos = self.fp.tell()
+        self.fp.seek(zinfo.file_offset, 0)
+        bytes = self.fp.read(intmask(zinfo.compress_size))
+        self.fp.seek(filepos, 0)
+        if zinfo.compress_type == ZIP_STORED:
+            pass
+        elif zinfo.compress_type == ZIP_DEFLATED:
+            raise NotImplementedError
+            # zlib compress/decompress code by Jeremy Hylton of CNRI
+            dc = zlib.decompressobj(-15)
+            bytes = dc.decompress(bytes)
+            # need to feed in unused pad byte so that zlib won't choke
+            ex = dc.decompress('Z') + dc.flush()
+            if ex:
+                bytes = bytes + ex
+        else:
+            raise BadZipfile, \
+                  "Unsupported compression method %d for file %s" % \
+            (zinfo.compress_type, filename)
+        crc = crc32(bytes)
+        if crc != r_uint(zinfo.CRC):
+            raise BadZipfile, "Bad CRC-32 for file %s" % filename
+        return bytes
+    

Added: pypy/dist/pypy/rlib/test/test_rzipfile.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/rlib/test/test_rzipfile.py	Sun May 25 21:40:58 2008
@@ -0,0 +1,35 @@
+
+from pypy.rlib.rzipfile import RZipFile
+from pypy.tool.udir import udir
+from zipfile import ZIP_STORED, ZIP_DEFLATED, ZipInfo, ZipFile
+from pypy.rpython.test.tool import BaseRtypingTest, LLRtypeMixin, OORtypeMixin
+import os
+import time
+
+class BaseTestRZipFile(BaseRtypingTest):
+
+    def setup_class(cls):
+        tmpdir = udir.ensure('zipimport_%s' % cls.__name__, dir=1)
+        zipname = str(tmpdir.join("somezip.zip"))
+        cls.zipname = zipname
+        zipfile = ZipFile(zipname, "w")
+        cls.year = time.localtime(time.time())[0]
+        zipfile.writestr("one", "stuff")
+        zipfile.writestr("dir" + os.path.sep + "two", "otherstuff")
+        zipfile.close()
+    
+    def test_rzipfile(self):
+        zipname = self.zipname
+        year = self.year
+        compression = self.compression
+        def one():
+            rzip = RZipFile(zipname, "r", compression)
+            info = rzip.getinfo('one')
+            return (info.date_time[0] == year and
+                    rzip.read('one') == 'stuff')
+
+        assert one()
+        assert self.interpret(one, [])
+
+class TestRZipFile(BaseTestRZipFile, LLRtypeMixin):
+    compression = ZIP_STORED



More information about the Pypy-commit mailing list