[Scipy-svn] r3471 - trunk/scipy/sparse

scipy-svn at scipy.org scipy-svn at scipy.org
Sun Oct 28 04:07:42 EDT 2007


Author: wnbell
Date: 2007-10-28 03:07:38 -0500 (Sun, 28 Oct 2007)
New Revision: 3471

Modified:
   trunk/scipy/sparse/sparse.py
Log:
unified format check for CSR and CSC matrices
functionality is also exposed as check_format()


Modified: trunk/scipy/sparse/sparse.py
===================================================================
--- trunk/scipy/sparse/sparse.py	2007-10-28 06:15:25 UTC (rev 3470)
+++ trunk/scipy/sparse/sparse.py	2007-10-28 08:07:38 UTC (rev 3471)
@@ -299,8 +299,6 @@
             return self._imag()
         elif attr == 'size':
             return self.getnnz()
-#        elif attr == 'ftype':
-#            return _transtabl.get(self.dtype.char,'')
         else:
             raise AttributeError, attr + " not found"
 
@@ -443,19 +441,13 @@
         m, n = self.shape
         if axis == 0:
             # sum over columns
-            # Does the following multiplication work in NumPy now?
-            o = asmatrix(ones((1, m), dtype=self.dtype))
-            return o * self
-            # o = ones(m, dtype=self.dtype)
-            # return asmatrix(self.rmatvec(o))
+            return asmatrix(ones((1, m), dtype=self.dtype)) * self
         elif axis == 1:
             # sum over rows
-            o = asmatrix(ones((n, 1), dtype=self.dtype))
-            return self * o
+            return self * asmatrix(ones((n, 1), dtype=self.dtype))
         elif axis == None:
             # sum over rows and columns
-            o1 = asmatrix(ones((n, 1), dtype=self.dtype))
-            return (self * o1).sum()
+            return ( self * asmatrix(ones((n, 1), dtype=self.dtype)) ).sum()
         else:
             raise ValueError, "axis out of bounds"
 
@@ -513,7 +505,23 @@
         fd.close()
 
 class _cs_matrix(spmatrix):
-    def _check(self):
+    """base matrix class for compressed row and column oriented matrices"""
+    def _check_format(self, orientation, full_check):
+        # some functions pass floats
+        self.shape = tuple([int(x) for x in self.shape])
+
+        assert(orientation in ['row','column'])
+        if orientation == 'row':
+            primary,secondary = 'row','column'
+            indptr_size = self.shape[0] + 1
+            indices_bound = self.shape[1]
+        else:
+            primary,secondary = 'column','row'
+            indptr_size = self.shape[1] + 1
+            indices_bound = self.shape[0]
+
+
+        # index arrays should have integer data types
         if self.indptr.dtype.kind != 'i':
             warnings.warn("indptr array has non-integer dtype.  " \
                           "Casting from %s to int" % self.indptr.dtype.name )
@@ -527,8 +535,49 @@
         self.indptr  = to_native(self.indptr)
         self.indices = to_native(self.indices)
         self.data    = to_native(self.data)
+        
+        # set the data type
+        self.dtype = self.data.dtype
+        
+        
+        # check array shapes
+        if (rank(self.data) != 1) or (rank(self.indices) != 1) or \
+           (rank(self.indptr) != 1):
+            raise ValueError,"data, indices, and indptr should be rank 1"
 
-        #TODO unify csr and csr _check
+        # check index pointer
+        if (len(self.indptr) != indptr_size ):
+            raise ValueError, \
+                "index pointer size (%d) should be (%d)" % \
+                 (len(self.indptr), indptr_size)
+        if (self.indptr[0] != 0):
+            raise ValueError,"index pointer should start with 0"
+        
+        # check index and data arrays
+        if (len(self.indices) != len(self.data)):
+            raise ValueError,"indices and data should have the same size"
+        if (self.indptr[-1] > len(self.indices)):
+            raise ValueError, \
+                  "Last value of index pointer should be less than "\
+                  "the size of index and data arrays"
+
+        self.nnz   =  self.indptr[-1]
+        self.nzmax = len(self.indices)
+
+        if full_check:
+            #check format validity (more expensive)
+            if self.nnz > 0:
+                if amax(self.indices[:self.nnz]) >= indices_bound:
+                    raise ValueError, "%s index values must be < %d" % \
+                            (secondary,indices_bound)
+                if amin(self.indices[:self.nnz]) < 0:
+                    raise ValueError, "%s index values must be >= 0" % \
+                            secondary
+            if numpy.diff(self.indptr).min() < 0:
+                raise ValueError,'index pointer values must form a " \
+                                    "non-decreasing sequence'
+
+
     
     def astype(self, t):
         return self._with_data(self.data.astype(t))
@@ -541,17 +590,16 @@
                    _formats[format][1]))
 
     def _with_data(self,data,copy=True):
-        """
-        Return a matrix with the same sparsity structure as self,
+        """Returns a matrix with the same sparsity structure as self,
         but with different data.  By default the structure arrays
         (i.e. .indptr and .indices) are copied.
         """
         if copy:
             return self.__class__((data,self.indices.copy(),self.indptr.copy()), \
-                                   dims=self.shape,dtype=data.dtype,check=False)
+                                   dims=self.shape,dtype=data.dtype)
         else:
             return self.__class__((data,self.indices,self.indptr), \
-                                   dims=self.shape,dtype=data.dtype,check=False)
+                                   dims=self.shape,dtype=data.dtype)
 
     def __abs__(self):
         return self._with_data(abs(self.data))
@@ -574,7 +622,7 @@
         indptr, ind, data = fn(in_shape[0], in_shape[1], \
                                self.indptr, self.indices, self.data,
                                other.indptr, other.indices, other.data)
-        return self.__class__((data, ind, indptr), dims=out_shape, check=False)
+        return self.__class__((data, ind, indptr), dims=out_shape)
 
 
     def __add__(self,other,fn):
@@ -780,7 +828,7 @@
 
     def _transpose(self, cls, copy=False):
         M, N = self.shape
-        return cls((self.data,self.indices,self.indptr),(N,M),copy=copy,check=False)
+        return cls((self.data,self.indices,self.indptr),(N,M),copy=copy)
 
 
     def conj(self, copy=False):
@@ -862,8 +910,9 @@
           - csc_matrix((data, row, ptr), [(M, N)])
             standard CSC representation
     """
-    def __init__(self, arg1, dims=None, nzmax=NZMAX, dtype=None, copy=False, check=True):
+    def __init__(self, arg1, dims=None, nzmax=NZMAX, dtype=None, copy=False):
         _cs_matrix.__init__(self)
+
         if isdense(arg1):
             self.dtype = getdtype(dtype, arg1)
             # Convert the dense array or matrix arg1 to CSC format
@@ -973,64 +1022,20 @@
 
         self.shape = (M, N)
 
-        self._check(check)
+        self.check_format(full_check=False)
 
+    def check_format(self,full_check=True):
+        """check whether matrix is in valid CSC format
 
-    def _check(self,full_check=True):
-        _cs_matrix._check(self)        
-        
-        # some functions pass floats
-        self.shape = tuple([int(x) for x in self.shape])
+            *Parameters*:
+                full_check:
+                    True  - rigorous check, O(N) operations : default
+                    False - basic check, O(1) operations
 
-        M, N = self.shape
-        nnz = self.indptr[-1]
-        nzmax = len(self.indices)
-        if (rank(self.data) != 1) or (rank(self.indices) != 1) or \
-           (rank(self.indptr) != 1):
-            raise ValueError, "data, rowind, and indptr arrays "\
-                  "should be rank 1"
-        if (len(self.data) != nzmax):
-            raise ValueError, "data and row list should have same length"
-        if (self.indptr[0] != 0):
-            raise ValueError,"index pointer should start with 0"
-        if (len(self.indptr) != N+1):
-            raise ValueError, \
-                  "index pointer size (%d) should be N+1 (%d)" %\
-                  (len(self.indptr), N+1)
-        if (nzmax < nnz):
-            raise ValueError, "nzmax (%d) must not be less than nnz (%d)" %\
-                  (nzmax, nnz)
+        """
 
-        if full_check:
-            #check format validity (more expensive)
-            if nnz > 0:
-                if amax(self.indices[:nnz]) >= M:
-                    raise ValueError, "row values must be < M"
-                if amin(self.indices[:nnz]) < 0:
-                    raise ValueError, "row values must be >= 0"
-            if numpy.diff(self.indptr).min() < 0:
-                raise ValueError,'indptr values must form a non-decreasing sequence'
+        _cs_matrix._check_format(self,'column',full_check)        
 
-        if (self.indptr[-1] > len(self.indices)):
-            raise ValueError, \
-                  "Last value of index list should be less than "\
-                  "the size of data list"
-
-
-        #TODO remove
-        #if (self.indices.dtype != numpy.intc):
-        #    self.indices = self.indices.astype(numpy.intc)
-        #if (self.indptr.dtype != numpy.intc):
-        #    self.indptr = self.indptr.astype(numpy.intc)
-
-        self.nnz = nnz
-        self.nzmax = nzmax
-        self.dtype = self.data.dtype
-        #TODO remove
-        #if self.dtype.char not in 'fdFD':
-        #    self.data = 1.0 * self.data
-        #    self.dtype = self.data.dtype
-
     def __getattr__(self, attr):
         if attr == 'rowind':
             warnings.warn("rowind attribute no longer in use. Use .indices instead",
@@ -1144,7 +1149,7 @@
             else:
                 raise IndexError, "row index occurs more than once"
 
-            self._check()
+            self.check_format(full_check=False)
         else:
             # We should allow slices here!
             raise IndexError, "invalid index"
@@ -1172,7 +1177,7 @@
     def tocsr(self):
         indptr, colind, data = csctocsr(self.shape[0], self.shape[1], \
                                         self.indptr, self.indices, self.data)
-        return csr_matrix((data, colind, indptr), self.shape, check=False)
+        return csr_matrix((data, colind, indptr), self.shape)
 
     def _toother(self):
         return self.tocsr()
@@ -1195,7 +1200,6 @@
         self.data = self.data[:nnz]
         self.indices = self.indices[:nnz]
         self.nzmax = nnz
-        self._check()
 
     def ensure_sorted_indices(self, inplace=False):
         """Return a copy of this matrix where the row indices are sorted
@@ -1232,7 +1236,7 @@
           - csr_matrix((data, col, ptr), [dims=(M, N)])
             standard CSR representation
     """
-    def __init__(self, arg1, dims=None, nzmax=NZMAX, dtype=None, copy=False, check=True):
+    def __init__(self, arg1, dims=None, nzmax=NZMAX, dtype=None, copy=False):
         _cs_matrix.__init__(self)
         if isdense(arg1):
             self.dtype = getdtype(dtype, arg1)
@@ -1336,58 +1340,21 @@
                 N = max(oldN, N)
 
         self.shape = (M, N)
-        
-        self._check(check)
 
-    def _check(self,full_check=True):
-        _cs_matrix._check(self)        
+        self.check_format(full_check=False)
 
-        # some functions pass floats
-        self.shape = tuple([int(x) for x in self.shape])
+    def check_format(self,full_check=True):
+        """check whether matrix is in valid CSR format
 
-        M, N = self.shape
-        nnz = self.indptr[-1]
-        nzmax = len(self.indices)
-        if (rank(self.data) != 1) or (rank(self.indices) != 1) or \
-           (rank(self.indptr) != 1):
-            raise ValueError, "data, colind, and indptr arrays "\
-                  "should be rank 1"
-        if (len(self.data) != nzmax):
-            raise ValueError, "data and row list should have same length"
-        if (self.indptr[0] != 0):
-            raise ValueError,"index pointer should start with 0"
-        if (len(self.indptr) != M+1):
-            raise ValueError, "index pointer should be of length #rows + 1"
+            *Parameters*:
+                full_check:
+                    True  - perform rigorous checking - default
+                    False - perform basic format check
 
+        """
 
-        if full_check:
-            #check format validity (more expensive)
-            if nnz > 0:
-                if amax(self.indices[:nnz]) >= N:
-                    raise ValueError, "column values must be < N"
-                if amin(self.indices[:nnz]) < 0:
-                    raise ValueError, "column values must be >= 0"
-            if numpy.diff(self.indptr).min() < 0:
-                raise ValueError,'indptr values must form a non-decreasing sequence'
+        _cs_matrix._check_format(self,'row',full_check)        
 
-        if (nnz > nzmax):
-            raise ValueError, \
-                  "last value of index list should be less than "\
-                  "the size of data list"
-        #TODO remove this
-        #if (self.indices.dtype != numpy.intc):
-        #    self.indices = self.indices.astype(numpy.intc)
-        #if (self.indptr.dtype != numpy.intc):
-        #    self.indptr = self.indptr.astype(numpy.intc)
-
-        self.nnz = nnz
-        self.nzmax = nzmax
-        self.dtype = self.data.dtype
-        #TODO remove
-        #if self.dtype.char not in 'fdFD':
-        #    self.data = self.data + 0.0
-        #    self.dtype = self.data.dtype
-
     def __getattr__(self, attr):
         if attr == 'colind':
             warnings.warn("colind attribute no longer in use. Use .indices instead",
@@ -1501,7 +1468,7 @@
             else:
                 raise IndexError, "row index occurs more than once"
 
-            self._check()
+            self.check_format(full_check=False)
         else:
             # We should allow slices here!
             raise IndexError, "invalid index"
@@ -1520,7 +1487,7 @@
     def tocsc(self):
         indptr, rowind, data = csrtocsc(self.shape[0], self.shape[1], \
                                         self.indptr, self.indices, self.data)
-        return csc_matrix((data, rowind, indptr), self.shape, check=False)
+        return csc_matrix((data, rowind, indptr), self.shape)
 
     def _toother(self):
         return self.tocsc()
@@ -1546,7 +1513,6 @@
         self.data = self.data[:nnz]
         self.indices = self.indices[:nnz]
         self.nzmax = nnz
-        self._check()
 
     def ensure_sorted_indices(self, inplace=False):
         """Return a copy of this matrix where the column indices are sorted
@@ -2276,22 +2242,22 @@
         if (nnz != len(self.row)) or (nnz != len(self.col)):
             raise ValueError, "row, column, and data array must all be "\
                   "the same length"
+        
+        # index arrays should have integer data types
+        if self.row.dtype.kind != 'i':
+            warnings.warn("row index array has non-integer dtype.  " \
+                          "Casting from %s to int" % self.row.dtype.name )
+            self.row = self.row.astype('i')
+        if self.col.dtype.kind != 'i':
+            warnings.warn("column index array has non-integer dtype.  " \
+                          "Casting from %s to int" % self.col.dtype.name )
+            self.col = self.col.astype('i')
 
-        if self.row.dtype > numpy.dtype('int64'):
-            raise TypeError,'row array has invalid dtype'
-        if self.col.dtype > numpy.dtype('int64'):
-            raise TypeError,'column array has invalid dtype'
-
-        #TODO fix this bandaid
+        #TODO do this in SWIG 
         self.row  = to_native(self.row)
         self.col  = to_native(self.col)
         self.data = to_native(self.data)
 
-        #if (self.row.dtype != numpy.intc):
-        #    self.row = self.row.astype(numpy.intc)
-        #if (self.col.dtype != numpy.intc):
-        #    self.col = self.col.astype(numpy.intc)
-
         if nnz > 0:
             if(amax(self.row) >= self.shape[0]):
                 raise ValueError, "row index exceedes matrix dimensions"
@@ -2340,7 +2306,7 @@
             indptr, rowind, data = cootocsc(self.shape[0], self.shape[1], \
                                             self.nnz, self.row, self.col, \
                                             self.data)
-            return csc_matrix((data, rowind, indptr), self.shape, check=True)
+            return csc_matrix((data, rowind, indptr), self.shape)
 
 
     def tocsr(self):
@@ -2350,7 +2316,7 @@
             indptr, colind, data = cootocsr(self.shape[0], self.shape[1], \
                                             self.nnz, self.row, self.col, \
                                             self.data)
-            return csr_matrix((data, colind, indptr), self.shape, check=False)
+            return csr_matrix((data, colind, indptr), self.shape)
 
     def tocoo(self, copy=False):
         return self.toself(copy)




More information about the Scipy-svn mailing list