[Scipy-svn] r4397 - in trunk/scipy/cluster: . tests

scipy-svn at scipy.org scipy-svn at scipy.org
Fri May 30 16:30:29 EDT 2008


Author: damian.eads
Date: 2008-05-30 15:30:15 -0500 (Fri, 30 May 2008)
New Revision: 4397

Modified:
   trunk/scipy/cluster/hierarchy.py
   trunk/scipy/cluster/tests/test_hierarchy.py
Log:
Fixed bug in hierarchy.is_valid_linkage. Wrote tests for hierarchy.numobs.

Modified: trunk/scipy/cluster/hierarchy.py
===================================================================
--- trunk/scipy/cluster/hierarchy.py	2008-05-30 19:36:47 UTC (rev 4396)
+++ trunk/scipy/cluster/hierarchy.py	2008-05-30 20:30:15 UTC (rev 4397)
@@ -1735,12 +1735,13 @@
             else:
                 raise ValueError('Linkage matrix must have 4 columns.')
         n = Z.shape[0]
-        if not ((Z[:,0]-xrange(n-1, n*2-1) < 0).any()) or \
-           not (Z[:,1]-xrange(n-1, n*2-1) < 0).any():
-            if name:
-                raise ValueError('Linkage \'%s\' contains negative indices.' % name)
-            else:
-                raise ValueError('Linkage contains negative indices.')
+        if n > 1:
+            if ((Z[:,0] < 0).any() or
+                (Z[:,1] < 0).any()):
+                if name:
+                    raise ValueError('Linkage \'%s\' contains negative indices.' % name)
+                else:
+                    raise ValueError('Linkage contains negative indices.')
     except Exception, e:
         if throw:
             raise
@@ -1805,7 +1806,7 @@
     return valid
 
 
-def is_valid_dm(D, t=0.0):
+def is_valid_dm(D, tol=0.0, throw=False, name="D"):
     """
     is_valid_dm(D)
 
@@ -1813,12 +1814,12 @@
       Distance matrices must be 2-dimensional numpy arrays containing
       doubles. They must have a zero-diagonal, and they must be symmetric.
 
-    is_valid_dm(D, t)
+    is_valid_dm(D, tol)
 
       Returns True if the variable D passed is a valid distance matrix.
       Small numerical differences in D and D.T and non-zeroness of the
       diagonal are ignored if they are within the tolerance specified
-      by t.
+      by tol.
 
     is_valid_dm(..., warning=True, name='V')
 
@@ -1841,6 +1842,7 @@
                 raise TypeError('\'%s\' passed as a distance matrix is not a numpy array.' % name)
             else:
                 raise TypeError('Variable is not a numpy array.')
+        s = D.shape
         if D.dtype != np.double:
             if name:
                 raise TypeError('Distance matrix \'%s\' must contain doubles (float64).' % name)
@@ -1851,7 +1853,7 @@
                 raise ValueError('Distance matrix \'%s\' must have shape=2 (i.e. be two-dimensional).' % name)
             else:
                 raise ValueError('Distance matrix must have shape=2 (i.e. be two-dimensional).')
-        if t == 0.0:
+        if tol == 0.0:
             if not (D == D.T).all():
                 if name:
                     raise ValueError('Distance matrix \'%s\' must be symmetric.' % name)
@@ -1863,16 +1865,16 @@
                 else:
                     raise ValueError('Distance matrix diagonal must be zero.')
         else:
-            if not (D - D.T <= t).all():
+            if not (D - D.T <= tol).all():
                 if name:
-                    raise ValueError('Distance matrix \'%s\' must be symmetric within tolerance %d.' % (name, t))
+                    raise ValueError('Distance matrix \'%s\' must be symmetric within tolerance %d.' % (name, tol))
                 else:
-                    raise ValueError('Distance matrix must be symmetric within tolerance %d.' % t)
-            if not (D[xrange(0, s[0]), xrange(0, s[0])] <= t).all():
+                    raise ValueError('Distance matrix must be symmetric within tolerance %5.5f.' % tol)
+            if not (D[xrange(0, s[0]), xrange(0, s[0])] <= tol).all():
                 if name:
-                    raise ValueError('Distance matrix \'%s\' diagonal must be close to zero within tolerance %d.' % (name, t))
+                    raise ValueError('Distance matrix \'%s\' diagonal must be close to zero within tolerance %5.5f.' % (name, tol))
                 else:
-                    raise ValueError('Distance matrix \'%s\' diagonal must be close to zero within tolerance %d.' % t)
+                    raise ValueError('Distance matrix \'%s\' diagonal must be close to zero within tolerance %5.5f.' % tol)
     except Exception, e:
         if throw:
             raise
@@ -1887,7 +1889,7 @@
     linkage matrix Z.
     """
     is_valid_linkage(Z, throw=True, name='Z')
-    return (Z.shape[0] - 1)
+    return (Z.shape[0] + 1)
 
 def numobs_dm(D):
     """
@@ -1896,7 +1898,7 @@
       Returns the number of original observations that correspond to a
       square, non-condensed distance matrix D.
     """
-    is_valid_dm(D, tol=Inf, throw=True, name='D')
+    is_valid_dm(D, tol=scipy.inf, throw=True, name='D')
     return D.shape[0]
 
 def numobs_y(Y):
@@ -1906,8 +1908,8 @@
       Returns the number of original observations that correspond to a
       condensed distance matrix Y.
     """
-    is_valid_y(y, throw=True, name='Y')
-    d = int(np.ceil(np.sqrt(y.shape[0] * 2)))
+    is_valid_y(Y, throw=True, name='Y')
+    d = int(np.ceil(np.sqrt(Y.shape[0] * 2)))
     return d
 
 def Z_y_correspond(Z, Y):

Modified: trunk/scipy/cluster/tests/test_hierarchy.py
===================================================================
--- trunk/scipy/cluster/tests/test_hierarchy.py	2008-05-30 19:36:47 UTC (rev 4396)
+++ trunk/scipy/cluster/tests/test_hierarchy.py	2008-05-30 20:30:15 UTC (rev 4397)
@@ -37,7 +37,7 @@
 import sys
 import os.path
 from scipy.testing import *
-from scipy.cluster.hierarchy import pdist, squareform, linkage, from_mlab_linkage
+from scipy.cluster.hierarchy import pdist, squareform, linkage, from_mlab_linkage, numobs_dm, numobs_y, numobs_linkage
 
 import numpy
 #import math
@@ -572,6 +572,8 @@
         #print "test-chebychev-iris", numpy.abs(Y_test2 - Y_right).max()
         self.failUnless(within_tol(Y_test2, Y_right, eps))
 
+    ################### squareform
+
     def test_squareform_empty_matrix(self):
         "Tests squareform on an empty matrix."
         A = numpy.zeros((0,0))
@@ -611,10 +613,14 @@
         for n in xrange(2, 5):
             X = numpy.random.rand(n, 4)
             Y = pdist(X)
+            self.failUnless(len(Y.shape) == 1)
             A = squareform(Y)
             Yr = squareform(A)
             s = A.shape
             k = 0
+            self.failUnless(len(s) == 2)
+            self.failUnless(len(Yr.shape) == 1)
+            self.failUnless(s[0] == s[1])
             #print A.shape, Y.shape, Yr.shape
             for i in xrange(0, s[0]):
                 for j in xrange(i+1, s[1]):
@@ -626,6 +632,37 @@
                     else:
                         self.failUnless(A[i, j] == 0)
 
+    ############## numobs_dm
+
+    def test_numobs_dm_multi_matrix(self):
+        "Tests numobs_dm with observation matrices of multiple sizes."
+        for n in xrange(2, 10):
+            X = numpy.random.rand(n, 4)
+            Y = pdist(X)
+            A = squareform(Y)
+            #print A.shape, Y.shape, Yr.shape
+            self.failUnless(numobs_dm(A) == n)
+
+    def test_numobs_y_multi_matrix(self):
+        "Tests numobs_y with observation matrices of multiple sizes."
+        for n in xrange(2, 10):
+            X = numpy.random.rand(n, 4)
+            Y = pdist(X)
+            #print A.shape, Y.shape, Yr.shape
+            self.failUnless(numobs_y(Y) == n)
+
+    def test_numobs_linkage_multi_matrix(self):
+        "Tests numobs_linkage with observation matrices of multiple sizes."
+        for n in xrange(2, 10):
+            X = numpy.random.rand(n, 4)
+            Y = pdist(X)
+            Z = linkage(Y)
+            #print Z
+            #print A.shape, Y.shape, Yr.shape
+            self.failUnless(numobs_linkage(Z) == n)
+
+    ################### linkage
+
     def test_linkage_single_tdist(self):
         "Tests linkage(Y, 'single') on the tdist data set."
         Z = linkage(_ytdist, 'single')
@@ -659,6 +696,7 @@
         expectedZ = from_mlab_linkage(Zmlab)
         #print Z, expectedZ, numpy.abs(Z - expectedZ).max()
         self.failUnless(within_tol(Z, expectedZ, eps))
+        
 
 def within_tol(a, b, tol):
     return numpy.abs(a - b).max() < tol




More information about the Scipy-svn mailing list