[Scipy-svn] r5067 - in trunk/scipy: cluster spatial spatial/tests

scipy-svn at scipy.org scipy-svn at scipy.org
Wed Nov 12 18:10:39 EST 2008


Author: damian.eads
Date: 2008-11-12 17:10:34 -0600 (Wed, 12 Nov 2008)
New Revision: 5067

Modified:
   trunk/scipy/cluster/hierarchy.py
   trunk/scipy/spatial/distance.py
   trunk/scipy/spatial/tests/test_distance.py
Log:
Added tests for scipy.spatial.distance.numobs_y.

Modified: trunk/scipy/cluster/hierarchy.py
===================================================================
--- trunk/scipy/cluster/hierarchy.py	2008-11-12 22:28:02 UTC (rev 5066)
+++ trunk/scipy/cluster/hierarchy.py	2008-11-12 23:10:34 UTC (rev 5067)
@@ -1260,7 +1260,7 @@
     """
     Z = np.asarray(Z, order='c')
     Y = np.asarray(Y, order='c')
-    return numobs_y(Y) == numobs_linkage(Z)
+    return distance.numobs_y(Y) == numobs_linkage(Z)
 
 def fcluster(Z, t, criterion='inconsistent', depth=2, R=None, monocrit=None):
     """

Modified: trunk/scipy/spatial/distance.py
===================================================================
--- trunk/scipy/spatial/distance.py	2008-11-12 22:28:02 UTC (rev 5066)
+++ trunk/scipy/spatial/distance.py	2008-11-12 23:10:34 UTC (rev 5067)
@@ -32,6 +32,9 @@
 +------------------+-------------------------------------------------+
 |numobs_dm         | # of observations in a distance matrix.         |
 +------------------+-------------------------------------------------+
+|numobs_y          | # of observations in a condensed distance       |
+|                  | matrix.                                         |
++------------------+-------------------------------------------------+
 
 Distance functions between two vectors ``u`` and ``v``. Computing
 distances over a large collection of vectors is inefficient for these
@@ -1550,7 +1553,12 @@
     """
     Y = np.asarray(Y, order='c')
     is_valid_y(Y, throw=True, name='Y')
-    d = int(np.ceil(np.sqrt(Y.shape[0] * 2)))
+    k = Y.shape[0]
+    if k == 0:
+        raise ValueError("The number of observations cannot be determined on an empty distance matrix.")
+    d = int(np.ceil(np.sqrt(k * 2)))
+    if (d*(d-1)/2) != k:
+        raise ValueError("Invalid condensed distance matrix passed. Must be some k where k=(n choose 2) for some n >= 2.")
     return d
 
 

Modified: trunk/scipy/spatial/tests/test_distance.py
===================================================================
--- trunk/scipy/spatial/tests/test_distance.py	2008-11-12 22:28:02 UTC (rev 5066)
+++ trunk/scipy/spatial/tests/test_distance.py	2008-11-12 23:10:34 UTC (rev 5067)
@@ -39,7 +39,8 @@
 import numpy as np
 from numpy.testing import *
 from scipy.spatial.distance import squareform, pdist, cdist, matching, \
-                                   jaccard, dice, sokalsneath, rogerstanimoto, russellrao, yule
+                                   jaccard, dice, sokalsneath, rogerstanimoto, \
+                                   russellrao, yule, numobs_y
 
 _filenames = ["iris.txt",
               "cdist-X1.txt",
@@ -81,6 +82,7 @@
 eo = {}
 
 def load_testing_files():
+    "Loading test data files for the scipy.spatial.distance tests."
     for fn in _filenames:
         name = fn.replace(".txt", "").replace("-ml", "")
         fqfn = os.path.join(os.path.dirname(__file__), fn)
@@ -1372,6 +1374,7 @@
         self.failUnless(rA.shape == (0,))
 
     def test_squareform_empty_vector(self):
+        "Tests squareform on an empty vector."
         v = np.zeros((0,))
         rv = squareform(np.array(v, dtype='double'))
         self.failUnless(rv.shape == (1,1))
@@ -1426,3 +1429,50 @@
                     k += 1
                 else:
                     self.failUnless(A[i, j] == 0)
+
+class TestNumObsY(TestCase):
+
+    def test_num_obs_y_1(self):
+        "Tests numobs_y(y) on a condensed distance matrix over 1 observations. Expecting exception."
+        self.failUnlessRaises(ValueError, self.check_y, 1)
+
+    def test_num_obs_y_2(self):
+        "Tests numobs_y(y) on a condensed distance matrix over 2 observations."
+        self.failUnless(self.check_y(2))
+
+    def test_num_obs_y_3(self):
+        "Tests numobs_y(y) on a condensed distance matrix over 3 observations."
+        self.failUnless(self.check_y(3))
+
+    def test_num_obs_y_4(self):
+        "Tests numobs_y(y) on a condensed distance matrix over 4 observations."
+        self.failUnless(self.check_y(4))
+
+    def test_num_obs_y_5_10(self):
+        "Tests numobs_y(y) on a condensed distance matrix between 5 and 15 observations."
+        for i in xrange(5, 16):
+            self.minit(i)
+
+    def test_num_obs_y_2_100(self):
+        "Tests numobs_y(y) on 100 improper condensed distance matrices. Expecting exception."
+        a = set([])
+        for n in xrange(2, 16):
+            a.add(n*(n-1)/2)
+        print a
+        for i in xrange(5, 105):
+            if i not in a:
+                self.failUnlessRaises(ValueError, self.bad_y, i)
+
+    def minit(self, n):
+        self.failUnless(self.check_y(n))
+
+    def bad_y(self, n):
+        y = np.random.rand(n)
+        return numobs_y(y)
+
+    def check_y(self, n):
+        return numobs_y(self.make_y(n)) == n
+
+    def make_y(self, n):
+        return np.random.rand((n*(n-1)/2))
+




More information about the Scipy-svn mailing list