[Scipy-svn] r3750 - in trunk/scipy/sparse: . tests

Sun Dec 30 19:53:58 EST 2007

Author: wnbell
Date: 2007-12-30 18:53:54 -0600 (Sun, 30 Dec 2007)
New Revision: 3750

Modified:
   trunk/scipy/sparse/base.py
   trunk/scipy/sparse/compressed.py
   trunk/scipy/sparse/coo.py
   trunk/scipy/sparse/csc.py
   trunk/scipy/sparse/csr.py
   trunk/scipy/sparse/dia.py
   trunk/scipy/sparse/tests/test_base.py
Log:
consolidated csr/csc_matrix setitem
added SparseEfficiencyWarning


Modified: trunk/scipy/sparse/base.py
===================================================================

--- trunk/scipy/sparse/base.py	2007-12-30 11:09:48 UTC (rev 3749)
+++ trunk/scipy/sparse/base.py	2007-12-31 00:53:54 UTC (rev 3750)
@@ -1,6 +1,7 @@
 """Base class for sparse matrices"""
 
-__all__ = ['spmatrix','isspmatrix','issparse']
+__all__ = ['spmatrix', 'isspmatrix', 'issparse',
+        'SparseWarning','SparseEfficiencyWarning']
 
 from warnings import warn
 
@@ -9,6 +10,11 @@
 from sputils import isdense, isscalarlike, isintlike
 
 
+class SparseWarning(Warning): pass
+class SparseFormatWarning(SparseWarning): pass
+class SparseEfficiencyWarning(SparseWarning): pass
+
+
 # The formats that we might potentially understand.
 _formats = {'csc':[0, "Compressed Sparse Column"],
             'csr':[1, "Compressed Sparse Row"],
@@ -273,7 +279,7 @@
             raise ValueError,'exponent must be an integer'
         elif isspmatrix(other):
             warn('Using ** for elementwise multiplication is deprecated.'\
-                    'Use .multiply() instead',DeprecationWarning)
+                    'Use .multiply() instead', DeprecationWarning)
             return self.multiply(other)
         else:
             raise NotImplementedError
@@ -477,6 +483,7 @@
 
     def save(self, file_name, format = '%d %d %f\n'):
         #deprecated on Dec 14 2007
+        #remove after 0.7 release
         warn('save() is deprecated, consider using mmwrite() or savemat()' \
                 ' provided by scipy.io instead',
                 DeprecationWarning)

Modified: trunk/scipy/sparse/compressed.py
===================================================================
--- trunk/scipy/sparse/compressed.py	2007-12-30 11:09:48 UTC (rev 3749)
+++ trunk/scipy/sparse/compressed.py	2007-12-31 00:53:54 UTC (rev 3750)
@@ -8,9 +8,9 @@
 import numpy
 from numpy import array, matrix, asarray, asmatrix, zeros, rank, intc, \
         empty, hstack, isscalar, ndarray, shape, searchsorted, empty_like, \
-        where
+        where, concatenate
 
-from base import spmatrix, isspmatrix
+from base import spmatrix, isspmatrix, SparseEfficiencyWarning
 from data import _data_matrix
 import sparsetools
 from sputils import upcast, to_native, isdense, isshape, getdtype, \
@@ -35,7 +35,7 @@
             warn("dims= is deprecated, use shape= instead", DeprecationWarning)
             shape=dims
         
-        if dims is not None:
+        if nzmax is not None:
             warn("nzmax= is deprecated", DeprecationWarning)
 
 
@@ -393,9 +393,9 @@
     def _get_single_element(self,row,col):
         M, N = self.shape
         if (row < 0):
-            row = M + row
+            row += M
         if (col < 0):
-            col = N + col
+            col += N
         if not (0<=row<M) or not (0<=col<N):
             raise IndexError, "index out of bounds"
         
@@ -466,7 +466,59 @@
         return self.__class__((data, index, indptr), shape=shape, \
                               dtype=self.dtype)
 
+    def __setitem__(self, key, val):
+        if isinstance(key, tuple):
+            row,col = key
+            if not (isscalarlike(row) and isscalarlike(col)):
+                raise NotImplementedError("Fancy indexing in assignment not "
+                                          "supported for csr matrices.")
+            M, N = self.shape
+            if (row < 0):
+                row += M
+            if (col < 0):
+                col += N
+            if not (0<=row<M) or not (0<=col<N):
+                raise IndexError, "index out of bounds"
+        
+            major_index, minor_index = self._swap((row,col))
+        
+            start = self.indptr[major_index]
+            end   = self.indptr[major_index+1]
+            indxs = where(minor_index == self.indices[start:end])[0]
 
+            num_matches = len(indxs)
+
+            if num_matches == 0:
+                #entry not already present
+                warn('changing the sparsity structure of a %s_matrix is expensive. ' \
+                        'lil_matrix is more efficient.' % self.format, \
+                        SparseEfficiencyWarning)
+                self.sort_indices()
+   
+                #no harm if not sorted
+                newindx = self.indices[start:end].searchsorted(minor_index)
+                newindx += start
+
+                val = array([val],dtype=self.data.dtype)
+                minor_index = array([minor_index],dtype=self.indices.dtype)
+                self.data    = concatenate((self.data[:newindx],val,self.data[newindx:]))
+                self.indices = concatenate((self.indices[:newindx],minor_index,self.indices[newindx:]))
+
+                self.indptr[major_index+1:] += 1
+
+            elif num_matches == 1:
+                #entry appears exactly once
+                self.data[start:end][indxs[0]] = val
+            else:
+                #entry appears more than once
+                raise ValueError,'nonzero entry (%d,%d) occurs more than once' % (row,col)
+
+            self.check_format(full_check=True)
+        else:
+            # We should allow slices here!
+            raise IndexError, "invalid index"
+
+
     # conversion methods
     def todia(self):
         return self.tocoo(copy=False).todia()

Modified: trunk/scipy/sparse/coo.py
===================================================================
--- trunk/scipy/sparse/coo.py	2007-12-30 11:09:48 UTC (rev 3749)
+++ trunk/scipy/sparse/coo.py	2007-12-31 00:53:54 UTC (rev 3750)
@@ -248,7 +248,8 @@
                       indptr, indices, data)
 
             A = csc_matrix((data, indices, indptr), self.shape)
-            A.sum_duplicates()
+            if sum_duplicates:
+                A.sum_duplicates()
             return A
 
     def tocsr(self,sum_duplicates=True):
@@ -272,7 +273,6 @@
 
             A = csr_matrix((data, indices, indptr), self.shape)
             if sum_duplicates:
-                A.sort_indices()
                 A.sum_duplicates()
             return A
     

Modified: trunk/scipy/sparse/csc.py
===================================================================
--- trunk/scipy/sparse/csc.py	2007-12-30 11:09:48 UTC (rev 3749)
+++ trunk/scipy/sparse/csc.py	2007-12-31 00:53:54 UTC (rev 3750)
@@ -9,7 +9,7 @@
         empty, hstack, isscalar, ndarray, shape, searchsorted, where, \
         concatenate
 
-from base import spmatrix,isspmatrix
+from base import spmatrix, isspmatrix
 from sparsetools import csc_tocsr
 from sputils import upcast, to_native, isdense, isshape, getdtype, \
         isscalarlike
@@ -104,52 +104,6 @@
         for r in xrange(self.shape[0]):
             yield csr[r,:]
 
-    def __setitem__(self, key, val):
-        if isinstance(key, tuple):
-            row = key[0]
-            col = key[1]
-            if not (isscalarlike(row) and isscalarlike(col)):
-                raise NotImplementedError("Fancy indexing in assignments not"
-                                          "supported for csc matrices.")
-            M, N = self.shape
-            if (row < 0):
-                row = M + row
-            if (col < 0):
-                col = N + col
-            if (row < 0) or (col < 0):
-                raise IndexError, "index out of bounds"
-            if (col >= N):
-                self.indptr = resize1d(self.indptr, col+2)
-                self.indptr[N+1:] = self.indptr[N]
-                N = col+1
-            if (row >= M):
-                M = row+1
-            self.shape = (M, N)
-
-            indxs = numpy.where(row == self.indices[self.indptr[col]:self.indptr[col+1]])
-    
-            if len(indxs[0]) == 0:
-                #value not present
-                newindx = self.indices[self.indptr[col]:self.indptr[col+1]].searchsorted(row)
-                newindx += self.indptr[col]
-
-                val = array([val],dtype=self.data.dtype)
-                row = array([row],dtype=self.indices.dtype)
-                self.data    = concatenate((self.data[:newindx],val,self.data[newindx:]))
-                self.indices = concatenate((self.indices[:newindx],row,self.indices[newindx:]))
-
-                self.indptr[col+1:] += 1
-            elif len(indxs[0]) == 1:
-                #value already present
-                self.data[self.indptr[col]:self.indptr[col+1]][indxs[0]] = val
-            else:
-                raise IndexError, "row index occurs more than once"
-
-            self.check_format(full_check=False)
-        else:
-            # We should allow slices here!
-            raise IndexError, "invalid index"
-
     def rowcol(self, ind):
         row = self.indices[ind]
         col = searchsorted(self.indptr, ind+1)-1
@@ -173,17 +127,8 @@
         from csr import csr_matrix
         return csr_matrix((data, indices, indptr), self.shape)
 
-#    def tobsc(self,blocksize=None, copy=True):
-#        if blocksize in [None, (1,1)]:
-#            from bsc import bsc_matrix
-#            arg1 = (self.data.reshape(-1,1,1),self.indices,self.indptr)  
-#            return bsc_matrix( arg1, shape=self.shape, copy=copy )
-#        else:
-#            #TODO make this more efficient
-#            return self.tocoo(copy=False).tobsc(blocksize=blocksize)
-#    
     def tobsr(self, blocksize=None):
-        if blocksize in [None, (1,1)]:
+        if blocksize == (1,1):
             from bsr import bsr_matrix
             csr = self.tocsr()
             arg1 = (csr.data.reshape(-1,1,1),csr.indices,csr.indptr)  

Modified: trunk/scipy/sparse/csr.py
===================================================================
--- trunk/scipy/sparse/csr.py	2007-12-30 11:09:48 UTC (rev 3749)
+++ trunk/scipy/sparse/csr.py	2007-12-31 00:53:54 UTC (rev 3750)
@@ -10,7 +10,7 @@
         empty, hstack, isscalar, ndarray, shape, searchsorted, where, \
         concatenate
 
-from base import spmatrix,isspmatrix
+from base import spmatrix, isspmatrix
 from sparsetools import csr_tocsc
 from sputils import upcast, to_native, isdense, isshape, getdtype, \
         isscalarlike, isintlike
@@ -102,54 +102,6 @@
         return csc_matrix((self.data,self.indices,self.indptr),(N,M),copy=copy)
 
 
-    def __setitem__(self, key, val):
-        if isinstance(key, tuple):
-            row = key[0]
-            col = key[1]
-            if not (isscalarlike(row) and isscalarlike(col)):
-                raise NotImplementedError("Fancy indexing in assignment not "
-                                          "supported for csr matrices.")
-            M, N = self.shape
-            if (row < 0):
-                row = M + row
-            if (col < 0):
-                col = N + col
-            if (row < 0) or (col < 0):
-                raise IndexError, "index out of bounds"
-            if (row >= M):
-                self.indptr = resize1d(self.indptr, row+2)
-                self.indptr[M+1:] = self.indptr[M]
-                M = row+1
-            if (col >= N):
-                N = col+1
-            self.shape = (M, N)
-
-            indxs = numpy.where(col == self.indices[self.indptr[row]:self.indptr[row+1]])
-
-            if len(indxs[0]) == 0:
-                #value not present
-                self.sort_indices()
-                newindx = self.indices[self.indptr[row]:self.indptr[row+1]].searchsorted(col)
-                newindx += self.indptr[row]
-
-                val = array([val],dtype=self.data.dtype)
-                col = array([col],dtype=self.indices.dtype)
-                self.data    = concatenate((self.data[:newindx],val,self.data[newindx:]))
-                self.indices = concatenate((self.indices[:newindx],col,self.indices[newindx:]))
-
-                self.indptr[row+1:] += 1
-
-            elif len(indxs[0]) == 1:
-                #value already present
-                self.data[self.indptr[row]:self.indptr[row+1]][indxs[0]] = val
-            else:
-                raise IndexError, "row index occurs more than once"
-
-            self.check_format(full_check=True)
-        else:
-            # We should allow slices here!
-            raise IndexError, "invalid index"
-
     def rowcol(self, ind):
         col = self.indices[ind]
         row = searchsorted(self.indptr, ind+1)-1
@@ -192,7 +144,7 @@
         return csc_matrix((data, indices, indptr), self.shape)
 
     def tobsr(self,blocksize=None,copy=True):
-        if blocksize in [None, (1,1)]:
+        if blocksize == (1,1):
             from bsr import bsr_matrix
             arg1 = (self.data.reshape(-1,1,1),self.indices,self.indptr)  
             return bsr_matrix( arg1, shape=self.shape, copy=copy )

Modified: trunk/scipy/sparse/dia.py
===================================================================
--- trunk/scipy/sparse/dia.py	2007-12-30 11:09:48 UTC (rev 3749)
+++ trunk/scipy/sparse/dia.py	2007-12-31 00:53:54 UTC (rev 3750)
@@ -208,12 +208,12 @@
             return self
 
     def tocsr(self):
-        #TODO optimize COO->CSR
-        return self.tocoo().tocsr()
+        #this could be faster
+        return self.tocoo().tocsr(sum_duplicates=False)
 
     def tocsc(self):
-        #TODO optimize COO->CSC
-        return self.tocoo().tocsc()
+        #this could be faster
+        return self.tocoo().tocsc(sum_duplicates=False)
 
     def tocoo(self):
         num_data = len(self.data)

Modified: trunk/scipy/sparse/tests/test_base.py
===================================================================
--- trunk/scipy/sparse/tests/test_base.py	2007-12-30 11:09:48 UTC (rev 3749)
+++ trunk/scipy/sparse/tests/test_base.py	2007-12-31 00:53:54 UTC (rev 3750)
@@ -22,7 +22,7 @@
 set_package_path()
 from scipy.sparse import csc_matrix, csr_matrix, dok_matrix, \
         coo_matrix, lil_matrix, dia_matrix, bsr_matrix, \
-        extract_diagonal, speye, spkron
+        extract_diagonal, speye, spkron, SparseEfficiencyWarning
 from scipy.linsolve import splu
 restore_path()
 
@@ -482,6 +482,8 @@
 
 class _TestGetSet:
     def check_setelement(self):
+        import warnings
+        warnings.simplefilter('ignore',SparseEfficiencyWarning)
         a = self.spmatrix((3,4))
         a[1,2] = 4.0
         a[0,1] = 3