[Scipy-svn] r2235 - trunk/Lib/stats
scipy-svn at scipy.org
scipy-svn at scipy.org
Tue Sep 26 12:59:29 EDT 2006
Author: rkern
Date: 2006-09-26 11:59:28 -0500 (Tue, 26 Sep 2006)
New Revision: 2235
Modified:
trunk/Lib/stats/kde.py
trunk/Lib/stats/stats.py
Log:
sum -> np.sum
Modified: trunk/Lib/stats/kde.py
===================================================================
--- trunk/Lib/stats/kde.py 2006-09-26 15:25:09 UTC (rev 2234)
+++ trunk/Lib/stats/kde.py 2006-09-26 16:59:28 UTC (rev 2235)
@@ -17,63 +17,60 @@
#
#-------------------------------------------------------------------------------
-#-------------------------------------------------------------------------------
-# Imports:
-#-------------------------------------------------------------------------------
-
+# Standard library imports.
import warnings
+# Scipy imports.
from scipy import linalg, special
-from numpy.oldnumeric import atleast_2d, reshape, zeros, newaxis, dot, exp, pi, sqrt, \
- ravel, Float, take, power, atleast_1d, squeeze, transpose
+from numpy import atleast_2d, reshape, zeros, newaxis, dot, exp, pi, sqrt, \
+ ravel, power, atleast_1d, squeeze, sum, transpose
from numpy.random import randint, multivariate_normal
+# Local imports.
import stats
import mvn
__all__ = ['gaussian_kde',
- ]
+]
-#-------------------------------------------------------------------------------
-# 'gaussian_kde' class:
-#-------------------------------------------------------------------------------
class gaussian_kde(object):
- """
+ """ Representation of a kernel-density estimate using Gaussian kernels.
+
Parameters
----------
dataset : (# of dims, # of data)-array
- datapoints to estimate from
+ datapoints to estimate from
Members
-------
d : int
- number of dimensions
+ number of dimensions
n : int
- number of datapoints
+ number of datapoints
Methods
-------
kde.evaluate(points) : array
- evaluate the estimated pdf on a provided set of points
+ evaluate the estimated pdf on a provided set of points
kde(points) : array
- same as kde.evaluate(points)
+ same as kde.evaluate(points)
kde.integrate_gaussian(mean, cov) : float
- multiply pdf with a specified gaussian and integrate over the whole domain
+ multiply pdf with a specified Gaussian and integrate over the whole domain
kde.integrate_box_1d(low, high) : float
- integrate pdf (1D only) between two bounds
+ integrate pdf (1D only) between two bounds
kde.integrate_box(low_bounds, high_bounds) : float
- integrate pdf over a rectangular space between low_bounds and high_bounds
+ integrate pdf over a rectangular space between low_bounds and high_bounds
kde.integrate_kde(other_kde) : float
- integrate two kernel density estimates multiplied together
+ integrate two kernel density estimates multiplied together
Internal Methods
----------------
kde.covariance_factor() : float
- computes the coefficient that multiplies the data covariance matrix to
- obtain the kernel covariance matrix. Set this method to kde.scotts_factor
- or kde.silverman_factor (or subclass to provide your own). The default is
- scotts_factor.
+ computes the coefficient that multiplies the data covariance matrix to
+ obtain the kernel covariance matrix. Set this method to
+ kde.scotts_factor or kde.silverman_factor (or subclass to provide your
+ own). The default is scotts_factor.
"""
def __init__(self, dataset):
@@ -82,11 +79,26 @@
self.d, self.n = self.dataset.shape
self._compute_covariance()
-
+
+
def evaluate(self, points):
"""Evaluate the estimated pdf on a set of points.
- points.shape == (# of dimensions, # of points)
+ Parameters
+ ----------
+ points : (# of dimensions, # of points)-array
+ Alternatively, a (# of dimensions,) vector can be passed in and
+ treated as a single point.
+
+ Returns
+ -------
+ values : (# of points,)-array
+ The values at each point.
+
+ Raises
+ ------
+ ValueError if the dimensionality of the input points is different than
+ the dimensionality of the KDE.
"""
points = atleast_2d(points).astype(self.dataset.dtype)
@@ -96,6 +108,7 @@
if d == 1 and m == self.d:
# points was passed in as a row vector
points = reshape(points, (self.d, 1))
+ m = 1
else:
msg = "points have dimension %s, dataset has dimension %s" % (d,
self.d)
@@ -126,21 +139,25 @@
__call__ = evaluate
def integrate_gaussian(self, mean, cov):
- """Multiply estimated density by a multivariate gaussian and integrate
+ """Multiply estimated density by a multivariate Gaussian and integrate
over the wholespace.
Parameters
----------
mean : vector
- the mean of the gaussian
+ the mean of the Gaussian
cov : matrix
- the covariance matrix of the gaussian
+ the covariance matrix of the Gaussian
Returns
-------
result : scalar
- the value of the integral
+ the value of the integral
+ Raises
+ ------
+ ValueError if the mean or covariance of the input Gaussian differs from
+ the KDE's dimensionality.
"""
mean = atleast_1d(squeeze(mean))
@@ -170,19 +187,23 @@
Parameters
----------
low : scalar
- lower bound of integration
+ lower bound of integration
high : scalar
- upper bound of integration
+ upper bound of integration
Returns
-------
value : scalar
- the result of the integral
+ the result of the integral
+
+ Raises
+ ------
+ ValueError if the KDE is over more than one dimension.
"""
if self.d != 1:
raise ValueError("integrate_box_1d() only handles 1D pdfs")
- stdev = ravel(sqrt(self.covariance))[0]
+ stdev = np.ravel(sqrt(self.covariance))[0]
normalized_low = ravel((low - self.dataset)/stdev)
normalized_high = ravel((high - self.dataset)/stdev)
@@ -198,16 +219,16 @@
Parameters
----------
low_bounds : vector
- lower bounds of integration
+ lower bounds of integration
high_bounds : vector
- upper bounds of integration
+ upper bounds of integration
maxpts=None : int
- maximum number of points to use for integration
+ maximum number of points to use for integration
Returns
-------
value : scalar
- the result of the integral
+ the result of the integral
"""
if maxpts is not None:
extra_kwds = {'maxpts': maxpts}
@@ -229,13 +250,17 @@
Parameters
----------
- other : GaussianKDE instance
- the other kde
+ other : gaussian_kde instance
+ the other kde
Returns
-------
value : scalar
- the result of the integral
+ the result of the integral
+
+ Raises
+ ------
+ ValueError if the KDEs have different dimensionality.
"""
if other.d != self.d:
@@ -268,22 +293,24 @@
Parameters
----------
- size : int=None
- if None, then size = self.n; otherwise, the number of samples to draw
+ size : int, optional
+ The number of samples to draw.
+ If not provided, then the size is the same as the underlying
+ dataset.
Returns
-------
dataset : (self.d, size)-array
- sampled dataset
+ sampled dataset
"""
if size is None:
size = self.n
- norm = transpose(multivariate_normal(zeros((self.d,), Float),
+ norm = transpose(multivariate_normal(zeros((self.d,), float),
self.covariance, size=size))
indices = randint(0, self.n, size=size)
- means = take(self.dataset, indices, axis=1)
+ means = self.dataset[:,indices]
return means + norm
@@ -294,13 +321,16 @@
def silverman_factor(self):
return power(self.n*(self.d+2.0)/4.0, -1./(self.d+4))
+ # This can be replaced with silverman_factor if one wants to use Silverman's
+ # rule for choosing the bandwidth of the kernels.
covariance_factor = scotts_factor
def _compute_covariance(self):
- """Computes the covariance matrix for each gaussian kernel using
+ """Computes the covariance matrix for each Gaussian kernel using
covariance_factor
"""
self.factor = self.covariance_factor()
self.covariance = atleast_2d(stats.cov(self.dataset, rowvar=1) *
self.factor * self.factor)
self.inv_cov = linalg.inv(self.covariance)
+ self._norm_factor = sqrt(linalg.det(2*pi*self.covariance)) * self.n
Modified: trunk/Lib/stats/stats.py
===================================================================
--- trunk/Lib/stats/stats.py 2006-09-26 15:25:09 UTC (rev 2234)
+++ trunk/Lib/stats/stats.py 2006-09-26 16:59:28 UTC (rev 2235)
@@ -246,7 +246,7 @@
x, axis = _chk_asarray(x,axis)
x = x.copy()
Norig = x.shape[axis]
- factor = 1.0-sum(np.isnan(x),axis)*1.0/Norig
+ factor = 1.0-np.sum(np.isnan(x),axis)*1.0/Norig
x[np.isnan(x)] = 0
return np.mean(x,axis)/factor
@@ -1727,7 +1727,7 @@
f_obs = asarray(f_obs)
k = len(f_obs)
if f_exp is None:
- f_exp = array([sum(f_obs,axis=0)/float(k)] * len(f_obs),float)
+ f_exp = array([np.sum(f_obs,axis=0)/float(k)] * len(f_obs),float)
f_exp = f_exp.astype(float)
chisq = np.add.reduce((f_obs-f_exp)**2 / f_exp)
return chisq, chisqprob(chisq, k-1)
@@ -1790,7 +1790,7 @@
ranked = rankdata(np.concatenate((x,y)))
rankx = ranked[0:n1] # get the x-ranks
#ranky = ranked[n1:] # the rest are y-ranks
- u1 = n1*n2 + (n1*(n1+1))/2.0 - sum(rankx,axis=0) # calc U for x
+ u1 = n1*n2 + (n1*(n1+1))/2.0 - np.sum(rankx,axis=0) # calc U for x
u2 = n1*n2 - u1 # remainder is U for y
bigu = max(u1,u2)
smallu = min(u1,u2)
@@ -1841,7 +1841,7 @@
ranked = rankdata(alldata)
x = ranked[:n1]
y = ranked[n1:]
- s = sum(x,axis=0)
+ s = np.sum(x,axis=0)
expected = n1*(n1+n2+1) / 2.0
z = (s - expected) / np.sqrt(n1*n2*(n1+n2+1)/12.0)
prob = 2*(1.0 -zprob(abs(z)))
@@ -1871,10 +1871,10 @@
del ranked[0:n[i]]
rsums = []
for i in range(len(args)):
- rsums.append(sum(args[i],axis=0)**2)
+ rsums.append(np.sum(args[i],axis=0)**2)
rsums[i] = rsums[i] / float(n[i])
- ssbn = sum(rsums,axis=0)
- totaln = sum(n,axis=0)
+ ssbn = np.sum(rsums,axis=0)
+ totaln = np.sum(n,axis=0)
h = 12.0 / (totaln*(totaln+1)) * ssbn - 3*(totaln+1)
df = len(args) - 1
if T == 0:
@@ -1902,7 +1902,7 @@
data = data.astype(float)
for i in range(len(data)):
data[i] = rankdata(data[i])
- ssbn = sum(sum(args,1)**2,axis=0)
+ ssbn = np.sum(np.sum(args,1)**2,axis=0)
chisq = 12.0 / (k*n*(k+1)) * ssbn - 3*n*(k+1)
return chisq, chisqprob(chisq,k-1)
@@ -1991,7 +1991,7 @@
if len(p) == 2: # ttest_ind
c = array([1,-1])
df = n-2
- fact = sum(1.0/sum(x,0),axis=0) # i.e., 1/n1 + 1/n2 + 1/n3 ...
+ fact = np.sum(1.0/np.sum(x,0),axis=0) # i.e., 1/n1 + 1/n2 + 1/n3 ...
t = dot(c,b) / np.sqrt(s_sq*fact)
probs = betai(0.5*df,0.5,float(df)/(df+t*t))
return t, probs
@@ -2074,7 +2074,7 @@
Returns: the square of the sum over axis.
"""
a, axis = _chk_asarray(a, axis)
- s = sum(a,axis)
+ s = np.sum(a,axis)
if not np.isscalar(s):
return s.astype(float)*s
else:
More information about the Scipy-svn
mailing list