[Matrix-SIG] Re: Reading and handling matrices.

jhauser@ifm.uni-kiel.de jhauser@ifm.uni-kiel.de
Sat, 24 Apr 1999 00:16:44 +0200 (CEST)


Hi to make you a little bit more comfortable in the transition from
Matlab to NumPy I append a first test version of a binary matlab file
IO class. It can only read Matlab Ver. 4 files but these can easily be 
written by all recent Versions.

Please test it, especially if you use some special types in the *.mat
files. If there are errors or questions please feel free to mail me.

HTH

__Janko

#!/usr/bin/env python
#
# mat.py
# Fast module to read matlab files (vers <= 4.2)
# Needs numpyio now part of signaltools from Travis Oliphant. Get it at
# http://oliphant.netpedia.net/ 
#
__ver__ = '0.1.1'
__author__ = 'jhauser@ifm.uni-kiel.de'

import os, struct
import string
import numpyio
import Numeric

MatIOError = 'MatIOError'

class MatFile:
    """File object representing a matlab file which is written so matlab 4 can
    read it. ( -v4 in Matlab 5). The object maps the variables in the file to
    attributes, which are loaded when one needs them. With the vars method all
    the names can be seen, without loading the data.
    """
    def __init__(self, fname, mode='r', endian='<'):
        try:
            self.fp = open(fname, mode)
            self.mode = mode
        except:
            raise IOError, "Cant't open "+fname
        
        self.fname = fname
        self.vardir = {}
        self.mtype={0:'d', 1:'d', 10:'f', 20:'l', 30:'h', 40:'H', 50:'B',
                    'd':1, 'f':10, 'l':20, 'h':30, 'H':40, 'B':50}
        self.endian = endian # default is little endian
        self.swap = 0
        self._getheaders()

    def __getattr__(self, attr):
        if attr in self.__dict__['vardir'].keys():
            self.__dict__[attr] = self._getvar(attr)
        return self.__dict__[attr]

    def write(self, name, attr):
        """Write data to file"""
        if name in self.vars():
            raise MatIOError, " there is a variable named %s in file %s" % \
                  (name, self.fname)
        
        self.fp.seek(0,2) # Use the whence parameter to go to end of file
        dend=self.fp.tell()
        header = self._mkheader(name, attr)
        header[5] = header[5]+dend+header[4] # set startbyte
        head_str = struct.pack(self.endian+'5l',header[0], header[1],
                                  header[2],header[3], header[4])
        self.fp.write(head_str)
        self.fp.write(struct.pack(self.endian+`header[4]`+'s', name))
        
        numpyio.fwrite(self.fp, 
                       header[1]*header[2], 
                       attr,
                       attr.typecode(),
                       self.swap)
        self.vardir[name] = header
        self.fp.flush()
        return
        
    def _mkheader(self, name, m):
        """Build a header structure for a matlab file. Thats the items in
        self.vardir.
        
        Format:
        0  0 (Le) or 1000 (Be) for endianes
        1  10 M of size MxN matrix (columns)
        2  10 N of size MxN matrix (rows)
        3  0  typecode of data
        4  2  length of name + 1
        5  22 start of data in bytes
        6  78 length of data in bytes
        """
        m=Numeric.asarray(m)
        if len(m.shape) > 2:
            raise MatIOError
        header=[0]
        header.append(m.shape[0])
        header.append(m.shape[1])
        header.append(self.mtype[m.typecode()]) # Not all are mapped!
        header.append(len(name)+1)
        header.append(0) # start later with an offeset. No seek here
        header.append(struct.calcsize(
                      `header[1]*header[2]`+m.typecode()))
        return header
        

    def _getheaders(self):
        """Iterate over the file and get each header with start position
        of data field. A header consists of 5 integers:
        0  0 (Le) or 1000 (Be) for endianes
        1  10 M of size MxN matrix (columns)
        2  10 N of size MxN matrix (rows)
        3  0  typecode of data
        4  2  length of name + 1

        Then follows the namestring of the variable, then the data
        """
        current = self.fp.tell()
        self.fp.seek(0)
        mlen = os.stat(self.fname)[6] # If the file is empty stop in while loop!
        # Check for endianess of data-file
        if mlen: # not a new file
            test = self.fp.read(4)
            if struct.unpack('<1l', test)[0] == 0:
                self.endian = '<'
                self.swap = 0
            elif struct.unpack('>1l', test)[0] == 1000:
                self.endian = '>'
                self.swap = 1
            else:
                raise MatIOError
            self.fp.seek(0)
            
        dend = 0
        mlen = os.stat(self.fname)[6] # If the file is empty stop here!
        while dend != mlen:
            start = dend
            header = list(struct.unpack(self.endian+'5l',self.fp.read(20)))
            tstart = start+20
            tend = tstart + header[4]
            dstart = tend
            dend = dstart+struct.calcsize(
                `header[1]*header[2]`+self.mtype[header[3]])
            name = struct.unpack(self.endian+`header[4]`+'s',
                                 self.fp.read(header[4]))
            name = name[0][:-1]
            map(header.append, (dstart, dend-dstart))

            self.vardir[name] = header
            self.fp.seek(dend)

        self.fp.seek(current)
        return

    def close(self):
        self.fp.close()
        return

    def _getvar(self, name):
        """Load the data from the file. Takes the info from the vardir_dictionary"""
        header = self.vardir[name]
        self.fp.seek(header[5])
        ar_size = header[1]*header[2]
        # read the data
        data = numpyio.fread(self.fp, 
                             ar_size, 
                             self.mtype[header[3]],
                             self.mtype[header[3]],
                             self.swap)
        if header[3] == 1: # That's a string in matlab represented by doubles
            return string.join(map(chr, data.tolist()),'')
        else:
            return Numeric.reshape(data, (header[1], header[2]))

    def vars(self):
        return self.vardir.keys()

# End of mat.py    


Herbert L. Roitblat writes:
 > Thanks very much for your really helpful response.  Let me digest that
 > before I bother anyone further.  Between you and Konrad I feel that I am on
 > my way.
 > 
 > The reason I used matlab at all was because it had the sparse matrix
 > functions I was looking for.  I have a sparse matrix consisting of integers
 > spread over a large matrix.  First I am practicing with a fairly "small"
 > matrix of only 2700 x 2700 element (about 350,000 of which are nonzero), but
 > I expect that the program will be used with matrices up to 50k x 50k, hence
 > the need for sparsity.  I need to get the eigen vectors for that matrix.
 > 
 > I am on the edge of my knowledge when dealing with many of these problems
 > (both in terms of the linerar algebra and in terms of python), so I really
 > appreciate your help.
 > 
 > Thanks a lot.
 > Herb
 > 
 > -----Original Message-----
 > From: Travis Oliphant <Oliphant.Travis@mayo.edu>
 > To: roitblat@hawaii.edu <roitblat@hawaii.edu>
 > Cc: matrix-sig@python.org <matrix-sig@python.org>
 > Date: Friday, April 23, 1999 8:25 AM
 > Subject: Reading and handling matrices.
 > 
 > 
 > >
 > >> I'm just starting with python and I'm having a few problems, which I =
 > >> suspect are rather simple-minded.
 > >
 > >Welcome aboard, starting with a new system can be intimidating.
 > >
 > >> 1. I was looking for some sparse matrix functions that would let me =
 > >> compute the eigen vectors of a rather large matrix in a reasonable =
 > >> amount of time.  I have found the eigen value functions in the numeric =
 > >> library and I have found some linear algebra routines that allow sparse
 > >> matrices, but I do not see a way to combine these.  I ended up saving =
 > >> the matrix as ascii, editing it using word, reading it using excel, and
 > >> then importing it to matlab.  There has to be a better way.
 > >
 > >Sparse matrices:
 > >
 > >As Konrad said there are several ways to handle sparse matrices, but
 > >no "comprehensive" package. There is a sparse matrix module that I can't
 > >say anything about except that it exists.  It defines a sparse matrix
 > >object and some methods for linear equation solving.  Look on
 > >http://www.python.org/topics/scicomp/numbercrunching.html
 > >for the Sparse Linear Equation Solver module by Neil Schemenauer
 > >
 > >Adding good sparse matrix support is something I'll be trying to add to my
 > >collection of useful packages for NumPy over the next year or two.  When I
 > >get to that point I'll likely start with the module on that page.
 > >
 > >It sounds like you went through a lot of work getting data in and out of
 > >NumPy arrays.  This was a source of confusion for me at first, too, so I
 > >understand a bit of the frustration.  The frustration does disappear
 > >as you get a feel for the structure of Python and the Numeric extensions.
 > >
 > >I wanted to read arbitrary binary files and so wrote a C-extension module
 > >that reads arbitrary binary data into Python NumPy arrays.  It turns out,
 > >I could have accomplished what I wanted to do with the fromstring function
 > >from the Numeric module and the read method of any file object.
 > >
 > >One method to get your data into MATLAB is to open a file object in
 > >Python and write the data in your NumPy array as a "string" (i.e. a
 > >sequence of raw bytes).
 > >
 > >>>> import Numeric
 > >>>> a = Numeric.ones((512,512))  # an uninteresting array of integers
 > >>>> fid = open("myfile","w")
 > >>>> fid.write(a.tostring())
 > >>>> fid.close()
 > >
 > >If you are on a 32-bit machine, you will now have a file called "myfile"
 > >in the current directory that should contain (512*512*4) bytes of 4-byte
 > >integer ones.
 > >
 > >You can read this data into MATLAB using matlab's fread function:
 > >
 > >>> fid = fopen('myfile');
 > >>> a = fread(fid,512*512,'int');
 > >>> fclose(fid);
 > >
 > >The same sequence works in reverse use MATLAB's fwrite function and
 > >Numeric Python's fromstring function and the read method of an open file
 > >object.
 > >
 > >You should NEVER have to edit numbers by hand in Word.  It makes me sad
 > >that you felt the need to do that.  This is a pretty low level way to read
 > >and write data, but is indispensable when interfacing with other code and
 > >outputs from other code.  A better solution is to use a higher level file
 > >format like NETCDF which you can read and write to from MATLAB and NumPy.
 > >
 > >Of course, I would wonder why you were doing much work in MATLAB anyway
 > >:-) (and if you are then what is NumPy missing and we'll add it.)
 > >
 > >If you want it I could send you a module I rarely use now that makes the
 > >NumPy way of reading and writing binary data more like MATLAB's which I
 > >was used, too when I started.
 > >
 > >Best of luck on your newfound tool.  Feel free to ask anymore questions as
 > >they arise.
 > >
 > >Sincerely,
 > >
 > >Travis Oliphant.
 > >
 > >(just a run-o'-the-mill NumPy fan)
 > >
 > >
 > 
 > 
 > _______________________________________________
 > Matrix-SIG maillist  -  Matrix-SIG@python.org
 > http://www.python.org/mailman/listinfo/matrix-sig
 >