[Scipy-svn] r2251 - in trunk/Lib: sandbox/stats stats
scipy-svn at scipy.org
scipy-svn at scipy.org
Tue Oct 10 01:53:47 EDT 2006
Author: oliphant
Date: 2006-10-10 00:53:33 -0500 (Tue, 10 Oct 2006)
New Revision: 2251
Modified:
trunk/Lib/sandbox/stats/anova.py
trunk/Lib/stats/distributions.py
trunk/Lib/stats/morestats.py
Log:
Fix sandbox/anova a bit. Fix distributions gengamma and invgamma to use gammaln and exp to avoid precision loss. Clean up bayes_mvs which returns mean, variance, and standard deviation with confidence intervals using Jeffrey's priors
Modified: trunk/Lib/sandbox/stats/anova.py
===================================================================
--- trunk/Lib/sandbox/stats/anova.py 2006-10-10 00:30:16 UTC (rev 2250)
+++ trunk/Lib/sandbox/stats/anova.py 2006-10-10 05:53:33 UTC (rev 2251)
@@ -2,6 +2,8 @@
# this module. No care has been taken to ensure that it works in its current
# state. At minimum, you will have to figure out what it needs to import to work.
+from numpy import sum
+
def anova(data, effects=['A','B','C','D','E','F','G','H','I','J','K']):
"""
Prints the results of single-variable between- and within-subject ANOVA
@@ -355,8 +357,8 @@
sourceNarray = apply_over_axes(hmean, Narray,btwnonsourcedims)
## Calc grand average (ga,axis=0), used for ALL effects
- ga = sum((sourceMarray*sourceNarray)/
- sum(sourceNarray,axis=0),axis=0)
+ ga = sum((sourceMarray*sourceNarray)/ \
+ sum(sourceNarray,axis=0),axis=0)
ga = reshape(ga,ones(len(Marray.shape)))
## If GRAND interaction, use harmonic mean of ALL cell Ns
Modified: trunk/Lib/stats/distributions.py
===================================================================
--- trunk/Lib/stats/distributions.py 2006-10-10 00:30:16 UTC (rev 2250)
+++ trunk/Lib/stats/distributions.py 2006-10-10 05:53:33 UTC (rev 2251)
@@ -18,6 +18,7 @@
import numpy
import numpy.random as mtrand
from numpy import flatnonzero as nonzero
+from scipy.special import gammaln as gamln
__all__ = [
'rv_continuous',
@@ -1707,7 +1708,7 @@
def _argcheck(self, a, c):
return (a > 0) & (c != 0)
def _pdf(self, x, a, c):
- return abs(c)* x**(c*a-1) / special.gamma(a) * exp(-x**c)
+ return abs(c)* exp((c*a-1)*log(x)-x**c- special.gammaln(a))
def _cdf(self, x, a, c):
val = special.gammainc(a,x**c)
cond = c + 0*val
@@ -1973,13 +1974,13 @@
class invgamma_gen(rv_continuous):
def _pdf(self, x, a):
- return x**(-a-1) / special.gamma(a) * exp(-1.0/x)
+ return exp(-(a+1)*log(x)-special.gammaln(a) - 1.0/x)
def _cdf(self, x, a):
return 1.0-special.gammainc(a, 1.0/x)
def _ppf(self, q, a):
return 1.0/special.gammaincinv(a,1-q)
def _munp(self, n, a):
- return special.gamma(a-n) / special.gamma(a)
+ return exp(special.gammaln(a-n) - special.gammaln(a))
def _entropy(self, a):
return a - (a+1.0)*special.psi(a) + special.gammaln(a)
invgamma = invgamma_gen(a=0.0, name='invgamma',longname="An inverted gamma",
@@ -2232,7 +2233,7 @@
#
class loggamma_gen(rv_continuous):
def _pdf(self, x, c):
- return exp(c*x-exp(x))/ special.gamma(c)
+ return exp(c*x-exp(x)-special.gammaln(c))
def _cdf(self, x, c):
return special.gammainc(c, exp(x))/ special.gamma(c)
def _ppf(self, q, c):
@@ -2446,20 +2447,21 @@
return mtrand.noncentral_f(dfn,dfd,nc,self._size)
def _pdf(self, x, dfn, dfd, nc):
n1,n2 = dfn, dfd
- Px = exp(-nc/2+nc*n1*x/(2*(n2+n1*x)))
+ term = -nc/2+nc*n1*x/(2*(n2+n1*x)) + gamln(n1/2.)+gamln(1+n2/2.)
+ term -= gamln((n1+n2)/2.0)
+ Px = exp(term)
Px *= n1**(n1/2) * n2**(n2/2) * x**(n1/2-1)
Px *= (n2+n1*x)**(-(n1+n2)/2)
- Px *= special.gamma(n1/2)*special.gamma(1+n2/2)
Px *= special.assoc_laguerre(-nc*n1*x/(2.0*(n2+n1*x)),n2/2,n1/2-1)
- Px /= special.beta(n1/2,n2/2)*special.gamma((n1+n2)/2.0)
+ Px /= special.beta(n1/2,n2/2)
def _cdf(self, x, dfn, dfd, nc):
return special.ncfdtr(dfn,dfd,nc,x)
def _ppf(self, q, dfn, dfd, nc):
return special.ncfdtri(dfn, dfd, nc, q)
def _munp(self, n, dfn, dfd, nc):
val = (dfn *1.0/dfd)**n
- val *= gam(n+0.5*dfn)*gam(0.5*dfd-n) / gam(dfd*0.5)
- val *= exp(-nc / 2.0)
+ term = gamln(n+0.5*dfn) + gamln(0.5*dfd-n) - gamln(dfd*0.5)
+ val *= exp(-nc / 2.0+term)
val *= special.hyp1f1(n+0.5*dfn, 0.5*dfn, 0.5*nc)
return val
def _stats(self, dfn, dfd, nc):
@@ -2528,8 +2530,9 @@
x2 = x*x
ncx2 = nc*nc*x2
fac1 = n + x2
- Px = n**(n/2) * special.gamma(n+1)
- Px /= arr(2.0**n*exp(nc*nc/2)*fac1**(n/2)*special.gamma(n/2))
+ trm1 = n/2.*log(n) + gamln(n+1)
+ trm1 -= n*log(2)+nc*nc/2.+(n/2.)*log(fac1)+gamln(n/2.)
+ Px = exp(trm1)
valF = ncx2 / (2*fac1)
trm1 = sqrt(2)*nc*x*special.hyp1f1(n/2+1,1.5,valF)
trm1 /= arr(fac1*special.gamma((n+1)/2))
Modified: trunk/Lib/stats/morestats.py
===================================================================
--- trunk/Lib/stats/morestats.py 2006-10-10 00:30:16 UTC (rev 2250)
+++ trunk/Lib/stats/morestats.py 2006-10-10 05:53:33 UTC (rev 2251)
@@ -40,6 +40,32 @@
### Bayesian confidence intervals for mean, variance, std
##########################################################
+# assume distributions are gaussian with given means and variances.
+def _gauss_mvs(x, n, alpha):
+ xbar = x.mean()
+ C = x.var()
+ val = distributions.norm.ppf((1+alpha)/2.0)
+ # mean is a Gaussian with mean xbar and variance C/n
+ mp = xbar
+ fac0 = sqrt(C/n)
+ term = fac0*val
+ ma = mp - term
+ mb = mp + term
+ # var is a Gaussian with mean C and variance 2*C*C/n
+ vp = C
+ fac1 = sqrt(2.0/n)*C
+ term = fac1*val
+ va = vp - term
+ vb = vp + term
+ # std is a Gaussian with mean sqrt(C) and variance C/(2*n)
+ st = sqrt(C)
+ fac2 = sqrt(0.5)*fac0
+ term = fac2*val
+ sta = st - term
+ stb = st + term
+ return mp, (ma, mb), vp, (va, vb), st, (sta, stb)
+
+
## Assumes all is known is that mean, and std (variance,axis=0) exist
## and are the same for all the data. Uses Jeffrey's prior
##
@@ -51,12 +77,14 @@
Assumes 1-d data all has same mean and variance and uses Jeffrey's prior
for variance and std.
- alpha gives the probability that the returned interval contains
+
+ alpha gives the probability that the returned confidence interval contains
the true parameter.
- Uses peak of conditional pdf as starting center.
+ Uses mean of conditional pdf as center estimate
+ (but centers confidence interval on the median)
- Returns (peak, (a, b)) for each of mean, variance and standard deviation.
+ Returns (center, (a, b)) for each of mean, variance and standard deviation.
Requires 2 or more data-points.
"""
x = ravel(data)
@@ -64,48 +92,43 @@
assert(n > 1)
assert(alpha < 1 and alpha > 0)
n = float(n)
- xbar = sb.add.reduce(x)/n
- C = sb.add.reduce(x*x)/n - xbar*xbar
- #
+ if (n > 1000): # just a guess. The curves look similar at this point.
+ return _gauss_mvs(x, n, alpha)
+ xbar = x.mean()
+ C = x.var()
+ # mean
fac = sqrt(C/(n-1))
tval = distributions.t.ppf((1+alpha)/2.0,n-1)
delta = fac*tval
ma = xbar - delta
mb = xbar + delta
mp = xbar
- #
+ # var
fac = n*C/2.0
- a = (n-1)/2.0
- if (n > 3): # use mean of distribution as center
- peak = 2/(n-3.0)
- F_peak = distributions.invgamma.cdf(peak,a)
- else: # use median
- F_peak = -1.0
- if (F_peak < alpha/2.0):
+ a = (n-1)/2
+ if (n < 4):
peak = distributions.invgamma.ppf(0.5,a)
- F_peak = 0.5
- q1 = F_peak - alpha/2.0
- q2 = F_peak + alpha/2.0
- if (q2 > 1): q2 = 1.0
+ else:
+ peak = 2.0/(n-3.0)
+ q1 = (1-alpha)/2.0
+ q2 = (1+alpha)/2.0
va = fac*distributions.invgamma.ppf(q1,a)
vb = fac*distributions.invgamma.ppf(q2,a)
vp = peak*fac
- #
+ # std
fac = sqrt(fac)
- if (n > 2):
- peak = special.gamma(a-0.5) / special.gamma(a)
- F_peak = distributions.gengamma.cdf(peak,a,-2)
- else: # use median
- F_peak = -1.0
- if (F_peak < alpha/2.0):
+ if (n < 3):
peak = distributions.gengamma.ppf(0.5,a,-2)
- F_peak = 0.5
- q1 = F_peak - alpha/2.0
- q2 = F_peak + alpha/2.0
- if (q2 > 1): q2 = 1.0
+ stp = fac*peak
+ else:
+ ndiv2 = (n-1)/2.0
+ term = special.gammaln(ndiv2-0.5)-special.gammaln(ndiv2)
+ term += (log(n)+log(C)-log(2.0))*0.5
+ stp = exp(term)
+ q1 = (1-alpha)/2.0
+ q2 = (1+alpha)/2.0
sta = fac*distributions.gengamma.ppf(q1,a,-2)
stb = fac*distributions.gengamma.ppf(q2,a,-2)
- stp = peak*fac
return (mp,(ma,mb)),(vp,(va,vb)),(stp,(sta,stb))
More information about the Scipy-svn
mailing list