[Scipy-svn] r6947 - in trunk/scipy/spatial: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Sat Nov 27 02:14:48 EST 2010
Author: warren.weckesser
Date: 2010-11-27 01:14:47 -0600 (Sat, 27 Nov 2010)
New Revision: 6947
Modified:
trunk/scipy/spatial/distance.py
trunk/scipy/spatial/tests/test_distance.py
Log:
BUG: spatial: (ticket #1146) pdist didn't handle the weighted minkowski function correctly (the 'w' argument was not defined); also fixed and simplified other code. Thanks to Teemu Ikonen for some of the patches.
Modified: trunk/scipy/spatial/distance.py
===================================================================
--- trunk/scipy/spatial/distance.py 2010-11-27 03:36:50 UTC (rev 6946)
+++ trunk/scipy/spatial/distance.py 2010-11-27 07:14:47 UTC (rev 6947)
@@ -110,10 +110,12 @@
"""
+import warnings
import numpy as np
+
import _distance_wrap
-import types
+
def _copy_array_if_base_present(a):
"""
Copies the array if its base points to a parent array.
@@ -121,7 +123,7 @@
if a.base is not None:
return a.copy()
elif np.issubsctype(a, np.float32):
- return array(a, dtype=np.double)
+ return np.array(a, dtype=np.double)
else:
return a
@@ -201,6 +203,7 @@
"""
u = np.asarray(u, order='c')
v = np.asarray(v, order='c')
+ w = np.asarray(w)
if p < 1:
raise ValueError("p must be at least 1")
return ((w * abs(u-v))**p).sum() ** (1.0 / p)
@@ -570,10 +573,8 @@
if u.dtype == np.int or u.dtype == np.float_ or u.dtype == np.double:
not_u = 1.0 - u
not_v = 1.0 - v
- nff = (not_u * not_v).sum()
nft = (not_u * v).sum()
ntf = (u * not_v).sum()
- ntt = (u * v).sum()
else:
not_u = ~u
not_v = ~v
@@ -806,7 +807,7 @@
return float(2.0 * (ntf + nft)) / denom
-def pdist(X, metric='euclidean', p=2, V=None, VI=None):
+def pdist(X, metric='euclidean', p=2, w=None, V=None, VI=None):
r"""
Computes the pairwise distances between m original observations in
n-dimensional space. Returns a condensed distance matrix Y. For
@@ -1038,50 +1039,39 @@
X = np.asarray(X, order='c')
- #if np.issubsctype(X, np.floating) and not np.issubsctype(X, np.double):
- # raise TypeError('Floating point arrays must be 64-bit (got %r).' %
- # (X.dtype.type,))
-
# The C code doesn't do striding.
[X] = _copy_arrays_if_base_present([_convert_to_double(X)])
s = X.shape
-
if len(s) != 2:
raise ValueError('A 2-dimensional array must be passed.');
- m = s[0]
- n = s[1]
+ m, n = s
dm = np.zeros((m * (m - 1) / 2,), dtype=np.double)
+ wmink_names = ['wminkowski', 'wmi', 'wm', 'wpnorm']
+ if w is None and (metric == wminkowski or metric in wmink_names):
+ raise ValueError('weighted minkowski requires a weight '
+ 'vector `w` to be given.')
+
if callable(metric):
- k = 0
if metric == minkowski:
- for i in xrange(0, m - 1):
- for j in xrange(i+1, m):
- dm[k] = minkowski(X[i, :], X[j, :], p)
- k = k + 1
+ def dfun(u,v): return minkowski(u, v, p)
elif metric == wminkowski:
- for i in xrange(0, m - 1):
- for j in xrange(i+1, m):
- dm[k] = wminkowski(X[i, :], X[j, :], p, w)
- k = k + 1
+ def dfun(u,v): return wminkowski(u, v, p, w)
elif metric == seuclidean:
- for i in xrange(0, m - 1):
- for j in xrange(i+1, m):
- dm[k] = seuclidean(X[i, :], X[j, :], V)
- k = k + 1
+ def dfun(u,v): return seuclidean(u, v, V)
elif metric == mahalanobis:
- for i in xrange(0, m - 1):
- for j in xrange(i+1, m):
- dm[k] = mahalanobis(X[i, :], X[j, :], V)
- k = k + 1
+ def dfun(u,v): return mahalanobis(u, v, V)
else:
- for i in xrange(0, m - 1):
- for j in xrange(i+1, m):
- dm[k] = metric(X[i, :], X[j, :])
- k = k + 1
+ dfun = metric
+ k = 0
+ for i in xrange(0, m - 1):
+ for j in xrange(i+1, m):
+ dm[k] = dfun(X[i], X[j])
+ k = k + 1
+
elif isinstance(metric,basestring):
mstr = metric.lower()
@@ -1109,9 +1099,9 @@
_distance_wrap.pdist_chebyshev_wrap(_convert_to_double(X), dm)
elif mstr in set(['minkowski', 'mi', 'm']):
_distance_wrap.pdist_minkowski_wrap(_convert_to_double(X), dm, p)
- elif mstr in set(['wminkowski', 'wmi', 'wm', 'wpnorm']):
- _distance_wrap.cdist_weighted_minkowski_wrap(_convert_to_double(X),
- dm, p, w)
+ elif mstr in wmink_names:
+ _distance_wrap.pdist_weighted_minkowski_wrap(_convert_to_double(X),
+ dm, p, np.asarray(w))
elif mstr in set(['seuclidean', 'se', 's']):
if V is not None:
V = np.asarray(V, order='c')
@@ -1122,7 +1112,9 @@
if len(V.shape) != 1:
raise ValueError('Variance vector V must be one-dimensional.')
if V.shape[0] != n:
- raise ValueError('Variance vector V must be of the same dimension as the vectors on which the distances are computed.')
+ raise ValueError('Variance vector V must be of the same '
+ 'dimension as the vectors on which the distances '
+ 'are computed.')
# The C code doesn't do striding.
[VV] = _copy_arrays_if_base_present([_convert_to_double(V)])
else:
@@ -1432,7 +1424,7 @@
if throw:
raise
if warning:
- _warning(str(e))
+ warnings.warn(str(e))
valid = False
return valid
@@ -1492,7 +1484,7 @@
if throw:
raise
if warning:
- _warning(str(e))
+ warnings.warn(str(e))
valid = False
return valid
Modified: trunk/scipy/spatial/tests/test_distance.py
===================================================================
--- trunk/scipy/spatial/tests/test_distance.py 2010-11-27 03:36:50 UTC (rev 6946)
+++ trunk/scipy/spatial/tests/test_distance.py 2010-11-27 07:14:47 UTC (rev 6947)
@@ -38,11 +38,11 @@
import numpy as np
from numpy.testing import verbose, TestCase, run_module_suite, \
- assert_raises
+ assert_raises, assert_array_equal
from scipy.spatial.distance import squareform, pdist, cdist, matching, \
jaccard, dice, sokalsneath, rogerstanimoto, \
russellrao, yule, num_obs_y, num_obs_dm, \
- is_valid_dm, is_valid_y
+ is_valid_dm, is_valid_y, wminkowski
_filenames = ["iris.txt",
"cdist-X1.txt",
@@ -877,6 +877,32 @@
Y_test2 = pdist(X, 'test_minkowski', 5.8)
self.assertTrue(within_tol(Y_test2, Y_right, eps))
+ ################# wminkowski
+
+ def test_pdist_wminkowski(self):
+ x = np.array([[0.0, 0.0, 0.0],
+ [1.0, 0.0, 0.0],
+ [0.0, 1.0, 0.0],
+ [1.0, 1.0, 1.0]])
+
+ p2_expected = [1.0, 1.0, np.sqrt(3),
+ np.sqrt(2), np.sqrt(2),
+ np.sqrt(2)]
+ p1_expected = [0.5, 1.0, 3.5,
+ 1.5, 3.0,
+ 2.5]
+ dist = pdist(x, metric=wminkowski, w=[1.0, 1.0, 1.0])
+ assert_array_equal(dist, p2_expected)
+
+ dist = pdist(x, metric=wminkowski, w=[0.5, 1.0, 2.0], p=1)
+ assert_array_equal(dist, p1_expected)
+
+ dist = pdist(x, metric='wminkowski', w=[1.0, 1.0, 1.0])
+ assert_array_equal(dist, p2_expected)
+
+ dist = pdist(x, metric='wminkowski', w=[0.5, 1.0, 2.0], p=1)
+ assert_array_equal(dist, p1_expected)
+
################### pdist: hamming
def test_pdist_hamming_random(self):
"Tests pdist(X, 'hamming') on random data."
More information about the Scipy-svn
mailing list