[Scipy-svn] r4780 - in branches/spatial/scipy/spatial: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Sun Oct 5 04:55:43 EDT 2008
Author: peridot
Date: 2008-10-05 03:55:42 -0500 (Sun, 05 Oct 2008)
New Revision: 4780
Modified:
branches/spatial/scipy/spatial/kdtree.py
branches/spatial/scipy/spatial/tests/test_kdtree.py
Log:
Added distance matrix implementation, sparse distance matrix, and a few comments.
Modified: branches/spatial/scipy/spatial/kdtree.py
===================================================================
--- branches/spatial/scipy/spatial/kdtree.py 2008-10-05 08:01:17 UTC (rev 4779)
+++ branches/spatial/scipy/spatial/kdtree.py 2008-10-05 08:55:42 UTC (rev 4780)
@@ -2,6 +2,7 @@
# Released under the scipy license
import numpy as np
from heapq import heappush, heappop
+import scipy.sparse
def distance_p(x,y,p=2):
"""Compute the pth power of the L**p distance between x and y
@@ -46,7 +47,12 @@
return np.prod(self.maxes-self.mins)
def split(self, d, split):
- """Produce two hyperrectangles by splitting along axis d."""
+ """Produce two hyperrectangles by splitting along axis d.
+
+ In general, if you need to compute maximum and minimum
+ distances to the children, it can be done more efficiently
+ by updating the maximum and minimum distances to the parent.
+ """ # FIXME: do this
mid = np.copy(self.maxes)
mid[d] = split
less = Rectangle(self.mins, mid)
@@ -572,3 +578,98 @@
else:
raise ValueError("r must be either a single value or a one-dimensional array of values")
+ def sparse_distance_matrix(self, other, max_distance, p=2.):
+ """Compute a sparse distance matrix
+
+ Computes a distance matrix between two KDTrees, leaving as zero
+ any distance greater than max_distance.
+
+ Parameters
+ ==========
+
+ other : KDTree
+
+ max_distance : positive float
+
+ Returns
+ =======
+
+ result : dok_matrix
+ Sparse matrix representing the results in "dictionary of keys" format.
+ """
+ result = scipy.sparse.dok_matrix((self.n,other.n))
+
+ def traverse(node1, rect1, node2, rect2):
+ if rect1.min_distance_rectangle(rect2, p)>max_distance:
+ return
+ elif isinstance(node1, KDTree.leafnode):
+ if isinstance(node2, KDTree.leafnode):
+ for i in node1.idx:
+ for j in node2.idx:
+ d = distance(self.data[i],other.data[j],p)
+ if d<=max_distance:
+ result[i,j] = d
+ else:
+ less, greater = rect2.split(node2.split_dim, node2.split)
+ traverse(node1,rect1,node2.less,less)
+ traverse(node1,rect1,node2.greater,greater)
+ elif isinstance(node2, KDTree.leafnode):
+ less, greater = rect1.split(node1.split_dim, node1.split)
+ traverse(node1.less,less,node2,rect2)
+ traverse(node1.greater,greater,node2,rect2)
+ else:
+ less1, greater1 = rect1.split(node1.split_dim, node1.split)
+ less2, greater2 = rect2.split(node2.split_dim, node2.split)
+ traverse(node1.less,less1,node2.less,less2)
+ traverse(node1.less,less1,node2.greater,greater2)
+ traverse(node1.greater,greater1,node2.less,less2)
+ traverse(node1.greater,greater1,node2.greater,greater2)
+ traverse(self.tree, Rectangle(self.maxes, self.mins),
+ other.tree, Rectangle(other.maxes, other.mins))
+
+ return result
+
+
+def distance_matrix(x,y,p=2,threshold=1000000):
+ """Compute the distance matrix.
+
+ Computes the matrix of all pairwise distances.
+
+ Parameters
+ ==========
+
+ x : array-like, m by k
+ y : array-like, n by k
+ p : float 1<=p<=infinity
+ Which Minkowski p-norm to use.
+ threshold : positive integer
+ If m*n*k>threshold use a python loop instead of creating
+ a very large temporary.
+
+ Returns
+ =======
+
+ result : array-like, m by n
+
+
+ """
+
+ x = np.asarray(x)
+ m, k = x.shape
+ y = np.asarray(y)
+ n, kk = y.shape
+
+ if k != kk:
+ raise ValueError("x contains %d-dimensional vectors but y contains %d-dimensional vectors" % (k, kk))
+
+ if m*n*k <= threshold:
+ return distance(x[:,np.newaxis,:],y[np.newaxis,:,:],p)
+ else:
+ result = np.empty((m,n),dtype=np.float) #FIXME: figure out the best dtype
+ if m<n:
+ for i in range(m):
+ result[i,:] = distance(x[i],y,p)
+ else:
+ for j in range(n):
+ result[:,j] = distance(x,y[j],p)
+ return result
Modified: branches/spatial/scipy/spatial/tests/test_kdtree.py
===================================================================
--- branches/spatial/scipy/spatial/tests/test_kdtree.py 2008-10-05 08:01:17 UTC (rev 4779)
+++ branches/spatial/scipy/spatial/tests/test_kdtree.py 2008-10-05 08:55:42 UTC (rev 4780)
@@ -3,7 +3,7 @@
from numpy.testing import *
import numpy as np
-from scipy.spatial import KDTree, distance, Rectangle
+from scipy.spatial import KDTree, distance, Rectangle, distance_matrix
class ConsistencyTests:
def test_nearest(self):
@@ -337,6 +337,43 @@
for r,result in zip(rs, results):
assert_equal(self.T1.count_neighbors(self.T2, r), result)
+class test_sparse_distance_matrix:
+ def setUp(self):
+ n = 100
+ k = 4
+ self.T1 = KDTree(np.random.randn(n,k),leafsize=2)
+ self.T2 = KDTree(np.random.randn(n,k),leafsize=2)
+ self.r = 0.3
+ def test_consistency_with_neighbors(self):
+ M = self.T1.sparse_distance_matrix(self.T2, self.r)
+ r = self.T1.query_ball_tree(self.T2, self.r)
+ for i,l in enumerate(r):
+ for j in l:
+ assert_equal(M[i,j],distance(self.T1.data[i],self.T2.data[j]))
+ for ((i,j),d) in M.items():
+ assert j in r[i]
+
+def test_distance_matrix():
+ m = 10
+ n = 11
+ k = 4
+ xs = np.random.randn(m,k)
+ ys = np.random.randn(n,k)
+ ds = distance_matrix(xs,ys)
+ assert_equal(ds.shape, (m,n))
+ for i in range(m):
+ for j in range(n):
+ assert_equal(distance(xs[i],ys[j]),ds[i,j])
+def test_distance_matrix_looping():
+ m = 10
+ n = 11
+ k = 4
+ xs = np.random.randn(m,k)
+ ys = np.random.randn(n,k)
+ ds = distance_matrix(xs,ys)
+ dsl = distance_matrix(xs,ys,threshold=1)
+ assert_equal(ds,dsl)
+
if __name__=="__main__":
run_module_suite()
More information about the Scipy-svn
mailing list