[Scipy-svn] r5586 - in trunk/scipy/io/matlab: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Sun Feb 22 03:57:55 EST 2009
Author: matthew.brett at gmail.com
Date: 2009-02-22 02:57:41 -0600 (Sun, 22 Feb 2009)
New Revision: 5586
Added:
trunk/scipy/io/matlab/tests/afunc.m
Modified:
trunk/scipy/io/matlab/mio5.py
trunk/scipy/io/matlab/miobase.py
trunk/scipy/io/matlab/zlibstreams.py
Log:
Much faster gzipstreams, back in for the moment, draft of binary read of unreadable data, as yet unused
Modified: trunk/scipy/io/matlab/mio5.py
===================================================================
--- trunk/scipy/io/matlab/mio5.py 2009-02-22 04:05:03 UTC (rev 5585)
+++ trunk/scipy/io/matlab/mio5.py 2009-02-22 08:57:41 UTC (rev 5586)
@@ -14,7 +14,7 @@
import time
import sys
import zlib
-from zlibstreams import TwoShotZlibInputStream
+from zlibstreams import StubbyZlibInputStream
from StringIO import StringIO
from copy import copy as pycopy
import warnings
@@ -23,6 +23,7 @@
import scipy.sparse
+import byteordercodes
from miobase import MatFileReader, MatArrayReader, MatMatrixGetter, \
MatFileWriter, MatStreamWriter, docfiller, matdims, \
MatReadError
@@ -230,6 +231,13 @@
obj = np.asarray(input_array).view(cls)
+class MatlabBinaryBlock(object):
+ ''' Class to contain matlab unreadable blocks '''
+ def __init__(self, binaryblock, endian):
+ self.binaryblock = binaryblock
+ self.endian = endian
+
+
class Mat5ArrayReader(MatArrayReader):
''' Class to get Mat5 arrays
@@ -326,6 +334,20 @@
header['is_global'] = flags_class >> 10 & 1
header['is_complex'] = flags_class >> 11 & 1
header['nzmax'] = af['nzmax']
+ ''' Here I am playing with a binary block read of
+ untranslatable data. I am not using this at the moment because
+ reading it has the side effect of making opposite ending mat
+ files unwritable on the round trip.
+
+ if mc == mxFUNCTION_CLASS:
+ # we can't read these, and want to keep track of the byte
+ # count - so we need to avoid the following unpredictable
+ # length element reads
+ return Mat5BinaryBlockGetter(self,
+ header,
+ af,
+ byte_count)
+ '''
header['dims'] = self.read_element()
header['name'] = self.read_element().tostring()
# maybe a dictionary mapping here as a dispatch table
@@ -354,9 +376,9 @@
'''
def __init__(self, array_reader, byte_count):
- instr = array_reader.mat_stream.read(byte_count)
super(Mat5ZArrayReader, self).__init__(
- StringIO(zlib.decompress(instr)),
+ StubbyZlibInputStream(array_reader.mat_stream,
+ byte_count),
array_reader.dtypes,
array_reader.processor_func,
array_reader.codecs,
@@ -387,7 +409,6 @@
def __init__(self, array_reader):
self.array_reader = array_reader
self.mat_stream = array_reader.mat_stream
- self.data_position = self.mat_stream.tell()
self.header = {}
self.name = ''
self.is_global = False
@@ -526,10 +547,50 @@
class Mat5FunctionGetter(Mat5ObjectMatrixGetter):
- def get_raw_array(self):
- raise MatReadError('Cannot read matlab functions')
+ ''' Class to provide warning and message string for unreadable
+ matlab function data
+ '''
+
+ def get_raw_array(self): raise MatReadError('Cannot read matlab functions')
+class Mat5BinaryBlockGetter(object):
+ ''' Class to read in unreadable binary blocks
+
+ This class could be used to read in matlab functions
+ '''
+
+ def __init__(self,
+ array_reader,
+ header,
+ array_flags,
+ byte_count):
+ self.array_reader = array_reader
+ self.header = header
+ self.array_flags = array_flags
+ arr_str = array_flags.tostring()
+ self.binaryblock = array_reader.mat_stream.read(
+ byte_count-len(array_flags.tostring()))
+ stream = StringIO(self.binaryblock)
+ reader = Mat5ArrayReader(
+ stream,
+ array_reader.dtypes,
+ lambda x : None,
+ array_reader.codecs,
+ array_reader.class_dtypes,
+ False)
+ self.header['dims'] = reader.read_element()
+ self.header['name'] = reader.read_element().tostring()
+ self.name = self.header['name']
+ self.is_global = header['is_global']
+
+ def get_array(self):
+ dt = self.array_reader.dtypes[miINT32]
+ endian = byteordercodes.to_numpy_code(dt.byteorder)
+ data = self.array_flags.tostring() + self.binaryblock
+ return MatlabBinaryBlock(data, endian)
+
+
class MatFile5Reader(MatFileReader):
''' Reader for Mat 5 mat files
Adds the following attribute to base class
@@ -840,13 +901,13 @@
self.update_matrix_tag()
-class Mat5FunctionWriter(Mat5CellWriter):
- ''' class to write matlab functions
+class Mat5BinaryBlockWriter(Mat5MatrixWriter):
+ ''' class to write untranslatable binary blocks '''
+ def write(self):
+ # check endian
+ # write binary block as is
+ pass
- Only differs from cell writing in mx class in header '''
- default_mclass = mxFUNCTION_CLASS
-
-
class Mat5StructWriter(Mat5CellWriter):
''' class to write matlab structs
@@ -1022,8 +1083,8 @@
self.unicode_strings,
self.long_field_names,
self.oned_as)
- if isinstance(narr, MatlabFunction):
- return Mat5FunctionWriter(*args)
+ if isinstance(narr, MatlabBinaryBlock):
+ return Mat5BinaryBlockWriter(*args)
if isinstance(narr, MatlabObject):
return Mat5ObjectWriter(*args)
if narr.dtype.hasobject: # cell or struct array
Modified: trunk/scipy/io/matlab/miobase.py
===================================================================
--- trunk/scipy/io/matlab/miobase.py 2009-02-22 04:05:03 UTC (rev 5585)
+++ trunk/scipy/io/matlab/miobase.py 2009-02-22 08:57:41 UTC (rev 5586)
@@ -3,6 +3,7 @@
"""
Base classes for matlab (TM) file stream reading
"""
+import warnings
import numpy as np
@@ -447,6 +448,10 @@
try:
res = getter.get_array()
except MatReadError, err:
+ warnings.warn(
+ 'Unreadable variable "%s", because "%s"' % \
+ (name, err),
+ Warning, stacklevel=2)
res = "Read error: %s" % err
getter.to_next()
mdict[name] = res
Added: trunk/scipy/io/matlab/tests/afunc.m
===================================================================
--- trunk/scipy/io/matlab/tests/afunc.m 2009-02-22 04:05:03 UTC (rev 5585)
+++ trunk/scipy/io/matlab/tests/afunc.m 2009-02-22 08:57:41 UTC (rev 5586)
@@ -0,0 +1,12 @@
+function [a, b] = afunc(c, d)
+% A function
+a = c + 1;
+b = d + 10;
+function [a, b] = afunc(c, d)
+% A function
+a = c + 1;
+b = d + 10;
+function [a, b] = afunc(c, d)
+% A function
+a = c + 1;
+b = d + 10;
Modified: trunk/scipy/io/matlab/zlibstreams.py
===================================================================
--- trunk/scipy/io/matlab/zlibstreams.py 2009-02-22 04:05:03 UTC (rev 5585)
+++ trunk/scipy/io/matlab/zlibstreams.py 2009-02-22 08:57:41 UTC (rev 5586)
@@ -30,6 +30,7 @@
'''
+from StringIO import StringIO
from zlib import decompressobj
@@ -38,21 +39,53 @@
>>> from StringIO import StringIO
>>> from zlib import compress
- >>> S = 'A handy module for reading compressed streams'
- >>> F = StringIO(compress(S))
- >>> ZF = ZlibInputStream(F)
- >>> ZF.read()
+ >>> s = 'A handy module for reading compressed streams'
+ >>> cs = compress(s)
+ >>> fobj = StringIO(cs)
+ >>> zf = ZlibInputStream(fobj)
+ >>> zf.read()
'A handy module for reading compressed streams'
- >>> ZF.tell() == len(S)
+ >>> zf.tell() == len(s)
True
- >>> F = StringIO(compress(S))
- >>> ZF = ZlibInputStream(F)
- >>> ZF.tell()
+ >>> fobj = StringIO(cs)
+ >>> zf = ZlibInputStream(fobj)
+ >>> zf.tell()
0
- >>> ZF.read(6)
+ >>> zf.read(6)
'A hand'
- >>> ZF.tell()
+ >>> zf.tell()
6
+
+ You can change the blocksize to preserve memory. Here it is
+ ridiculously small for testing.
+
+ >>> fobj = StringIO(cs)
+ >>> zf = ZlibInputStream(fobj)
+ >>> zf.default_blocksize = 3
+ >>> zf.read()
+ 'A handy module for reading compressed streams'
+
+ You can set the known length of the zipped stream. This is
+ normally when the stream is embedded in another stream, so there
+ is no end-of-file signal when the zlib stream is finished.
+
+ >>> fobj = StringIO(cs + 'padding')
+ >>> zf = ZlibInputStream(fobj, len(cs))
+ >>> zf.default_blocksize = 3
+ >>> zf.read()
+ 'A handy module for reading compressed streams'
+
+ >>> fobj = StringIO(cs + 'padding')
+ >>> zf = ZlibInputStream(fobj, len(cs))
+ >>> zf.default_blocksize = 3
+ >>> zf.read(7)
+ 'A handy'
+ >>> zf.tell()
+ 7
+ >>> zf.read(7)
+ ' module'
+ >>> zf.tell()
+ 14
'''
default_blocksize = 16384 # 16K
@@ -71,7 +104,7 @@
self.zipped_length=zipped_length
self.exhausted = False
self.unzipped_pos = 0
- self.data = ""
+ self.data = StringIO()
self._unzipper = decompressobj()
# number of zlib compressed bytes read
self._z_bytes_read = 0
@@ -116,20 +149,23 @@
''' Fill self.data with at least *bytes* number of bytes
If bytes == -1, continue until the end of the stream
- Returns ``None``
+ Parameters
+ ----------
+ bytes : integer
+ Number of bytes to read from zlib stream
+ If ``bytes==-1``, read the remaining bytes in stream
+
+ Returns
+ -------
+ None
'''
if self.exhausted:
return
# read until we have enough bytes in the buffer
read_to_end = bytes == -1
-
- bytes_to_fill = bytes - len(self.data)
- if not (bytes_to_fill or read_to_end):
- return
- # store data chunks in a list until the end so that we avoid the
- # quadratic behavior of continuously extending a string
- data_chunks = [self.data]
- while bytes_to_fill > 0 or read_to_end:
+ s_data = StringIO(self.data.read())
+ s_data.seek(0, 2) # seek to end
+ while read_to_end or (bytes - s_data.pos) > 0:
z_n_to_fetch = self._blocksize_iterator.next()
if z_n_to_fetch == 0:
self.exhausted = True
@@ -137,15 +173,14 @@
raw = self.fileobj.read(z_n_to_fetch)
self._z_bytes_read += len(raw)
if raw:
- decompressed = self._unzipper.decompress(raw)
- data_chunks.append(decompressed)
- bytes_to_fill -= len(decompressed)
+ s_data.write(self._unzipper.decompress(raw))
if len(raw) < z_n_to_fetch: # hit end of file
- data_chunks.append(self._unzipper.flush())
+ s_data.write(self._unzipper.flush())
self.exhausted = True
break
- self.data = ''.join(data_chunks)
-
+ s_data.seek(0)
+ self.data = s_data
+
def seek(self, offset, whence=0):
''' Set position in uncompressed stream
@@ -199,18 +234,13 @@
string containing read data
'''
- if bytes == -1:
+ if (bytes == -1 or
+ (self.data.len-self.data.pos) < bytes):
self.__fill(bytes)
- data = self.data
- self.data = ""
- else:
- if len(self.data) < bytes:
- self.__fill(bytes)
- data = self.data[:bytes]
- self.data = self.data[bytes:]
+ data = self.data.read(bytes)
self.unzipped_pos += len(data)
return data
-
+
def readline(self):
''' Read text line from data
@@ -220,19 +250,39 @@
>>> from zlib import compress
>>> S = 'A handy module\\nfor reading\\ncompressed streams'
>>> F = StringIO(compress(S))
- >>> ZF = ZlibInputStream(F)
- >>> ZF.readline()
+ >>> zf = ZlibInputStream(F)
+ >>> zf.readline()
'A handy module\\n'
- >>> ZF.readline()
+ >>> zf.readline()
'for reading\\n'
+
+ You can also set the block size
+ (here very small for testing)
+
+ >>> F = StringIO(compress(S))
+ >>> zf = ZlibInputStream(F)
+ >>> zf.default_blocksize = 5
+ >>> zf.readline()
+ 'A handy module\\n'
+ >>> zf.readline()
+ 'for reading\\n'
'''
# make sure we have an entire line
- while not self.exhausted and "\n" not in self.data:
- self.__fill(len(self.data) + 512)
- i = self.data.find("\n") + 1
- if i <= 0:
- return self.read()
- return self.read(i)
+ data = self.data.read()
+ blocks = [data]
+ while not self.exhausted and "\n" not in data:
+ # fill results in fresh data starting at 0
+ data = self.read(512)
+ blocks.append(data)
+ data = ''.join(blocks)
+ i = data.find("\n") + 1
+ if i <= 0: # newline at end
+ self.unzipped_pos += len(data)
+ return data
+ # new line not at end
+ self.unzipped_pos += i
+ self.data = StringIO(data[i:])
+ return data[:i]
def readlines(self):
''' Read all data broken up into list of text lines
@@ -240,8 +290,8 @@
>>> from zlib import compress
>>> S = 'A handy module\\nfor reading\\ncompressed streams'
>>> F = StringIO(compress(S))
- >>> ZF = ZlibInputStream(F)
- >>> ZF.readlines()
+ >>> zf = ZlibInputStream(F)
+ >>> zf.readlines()
['A handy module\\n', 'for reading\\n', 'compressed streams']
>>>
'''
@@ -273,4 +323,35 @@
yield self.default_blocksize
+class OneShotZlibInputStream(ZlibInputStream):
+ ''' One shot read, for testing '''
+
+ def _block_size_generator(self):
+ ''' Generator to give block sizes for reading
+ '''
+ if self.zipped_length:
+ yield self.zipped_length
+ yield 0
+ else:
+ while True:
+ yield self.default_blocksize
+
+class StubbyZlibInputStream(ZlibInputStream):
+ ''' One short, then fairly long reads '''
+
+ default_blocksize = 128 * 1024 # 128K
+ first_blocksize = 512 # 512 bytes
+
+ def _block_size_generator(self):
+ if self.zipped_length:
+ # do not read beyond specified length
+ yield min(self.zipped_length, self.first_blocksize)
+ while True:
+ yield min(
+ self.zipped_length - self._z_bytes_read,
+ self.default_blocksize)
+ else:
+ yield self.first_blocksize
+ while True:
+ yield self.default_blocksize
More information about the Scipy-svn
mailing list