[Scipy-svn] r5285 - in trunk/scipy/stats: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Sun Dec 21 12:33:43 EST 2008
Author: josef
Date: 2008-12-21 11:33:40 -0600 (Sun, 21 Dec 2008)
New Revision: 5285
Modified:
trunk/scipy/stats/stats.py
trunk/scipy/stats/tests/test_stats.py
Log:
cleaning up stats.ttests, add tests, tests and doctests pass, see http://projects.scipy.org/pipermail/scipy-dev/2008-December/010670.html
Modified: trunk/scipy/stats/stats.py
===================================================================
--- trunk/scipy/stats/stats.py 2008-12-20 13:50:20 UTC (rev 5284)
+++ trunk/scipy/stats/stats.py 2008-12-21 17:33:40 UTC (rev 5285)
@@ -1870,23 +1870,33 @@
##### INFERENTIAL STATISTICS #####
#####################################
-def ttest_1samp(a, popmean):
+def ttest_1samp(a, popmean, axis=0):
"""
Calculates the t-obtained for the independent samples T-test on ONE group
of scores a, given a population mean.
Returns: t-value, two-tailed prob
"""
- a = asarray(a)
- x = np.mean(a)
- v = np.var(a, ddof=1)
- n = len(a)
- df = n-1
- svar = ((n-1)*v) / float(df)
- t = (x-popmean)/np.sqrt(svar*(1.0/n))
- #prob = betai(0.5*df,0.5,df/(df+t*t))
+
+
+ a, axis = _chk_asarray(a, axis)
+ n = a.shape[axis]
+ df=n-1
+
+ d = np.mean(a,axis) - popmean
+ v = np.var(a, axis, ddof=1)
+
+ t = d / np.sqrt(v/float(n))
+ t = np.where((d==0)*(v==0), 1.0, t) #define t=0/0 = 1, identical mean, var
prob = distributions.t.sf(np.abs(t),df)*2 #use np.abs to get upper tail
+ #distributions.t.sf currently does not propagate nans
+ #this can be dropped, if distributions.t.sf propagates nans
+ #if this is removed, then prob = prob[()] needs to be removed
+ prob = np.where(np.isnan(t), np.nan, prob)
+ if t.ndim == 0:
+ t = t[()]
+ prob = prob[()]
return t,prob
@@ -1928,38 +1938,44 @@
# test with sample with identical means
>>> rvs1 = stats.norm.rvs(loc=5,scale=10,size=500)
>>> rvs2 = stats.norm.rvs(loc=5,scale=10,size=500)
- >>> stats.ttest_ind(rvs1,rvs2)
- (array(0.26833823296239279), 0.78849443369564765)
+ >>> ttest_ind(rvs1,rvs2)
+ (0.26833823296239279, 0.78849443369564765)
# test with sample with different means
>>> rvs3 = stats.norm.rvs(loc=8,scale=10,size=500)
- >>> stats.ttest_ind(rvs1,rvs3)
- (array(-5.0434013458585092), 5.4302979468623391e-007)
+ >>> ttest_ind(rvs1,rvs3)
+ (-5.0434013458585092, 5.4302979468623391e-007)
"""
a, b, axis = _chk2_asarray(a, b, axis)
- x1 = mean(a,axis)
- x2 = mean(b,axis)
- v1 = var(a,axis)
- v2 = var(b,axis)
+
+ v1 = np.var(a,axis,ddof = 1)
+ v2 = np.var(b,axis,ddof = 1)
n1 = a.shape[axis]
n2 = b.shape[axis]
df = n1+n2-2
+
+ d = mean(a,axis) - mean(b,axis)
svar = ((n1-1)*v1+(n2-1)*v2) / float(df)
- zerodivproblem = svar == 0
- t = (x1-x2)/np.sqrt(svar*(1.0/n1 + 1.0/n2)) # N-D COMPUTATION HERE!!!!!!
- t = np.where(zerodivproblem, 1.0, t) # replace NaN t-values with 1.0
- probs = distributions.t.sf(np.abs(t),df)*2
- if not np.isscalar(t):
- probs = probs.reshape(t.shape)
- if not np.isscalar(probs) and len(probs) == 1:
- probs = probs[0]
- return t, probs
+ t = d/np.sqrt(svar*(1.0/n1 + 1.0/n2))
+ t = np.where((d==0)*(svar==0), 1.0, t) #define t=0/0 = 0, identical means
+ prob = distributions.t.sf(np.abs(t),df)*2#use np.abs to get upper tail
+
+ #distributions.t.sf currently does not propagate nans
+ #this can be dropped, if distributions.t.sf propagates nans
+ #if this is removed, then prob = prob[()] needs to be removed
+ prob = np.where(np.isnan(t), np.nan, prob)
+ if t.ndim == 0:
+ t = t[()]
+ prob = prob[()]
+
+ return t, prob
-def ttest_rel(a,b,axis=None):
+
+def ttest_rel(a,b,axis=0):
"""Calculates the t-obtained T-test on TWO RELATED samples of scores, a
and b. From Numerical Recipies, p.483. Axis can equal None (ravel array
first), or an integer (the axis over which to operate on a and b).
@@ -1987,8 +2003,6 @@
Examples
--------
- (note: after changes difference in 13th decimal)
-
>>> from scipy import stats
>>> import numpy as np
@@ -1998,35 +2012,40 @@
>>> rvs2 = stats.norm.rvs(loc=5,scale=10,size=500) + \
stats.norm.rvs(scale=0.2,size=500)
>>> stats.ttest_rel(rvs1,rvs2)
- (array(0.24101764965300965), 0.80964043445811562)
+ (0.24101764965300962, 0.80964043445811562)
>>> rvs3 = stats.norm.rvs(loc=8,scale=10,size=500) + \
stats.norm.rvs(scale=0.2,size=500)
>>> stats.ttest_rel(rvs1,rvs3)
- (array(-3.9995108708727929), 7.3082402191726459e-005)
+ (-3.9995108708727933, 7.3082402191726459e-005)
"""
a, b, axis = _chk2_asarray(a, b, axis)
- if len(a)!=len(b):
+ if a.shape[axis] != b.shape[axis]:
raise ValueError, 'unequal length arrays'
- x1 = mean(a,axis)
- x2 = mean(b,axis)
- v1 = var(a,axis)
- v2 = var(b,axis)
n = a.shape[axis]
df = float(n-1)
+
d = (a-b).astype('d')
+ v = np.var(d,axis,ddof=1)
+ dm = np.mean(d, axis)
+
+ t = dm / np.sqrt(v/float(n))
+ t = np.where((dm==0)*(v==0), 1.0, t) #define t=0/0 = 1, zero mean and var
+ prob = distributions.t.sf(np.abs(t),df)*2 #use np.abs to get upper tail
+ #distributions.t.sf currently does not propagate nans
+ #this can be dropped, if distributions.t.sf propagates nans
+ #if this is removed, then prob = prob[()] needs to be removed
+ prob = np.where(np.isnan(t), np.nan, prob)
+
+## if not np.isscalar(t):
+## probs = np.reshape(probs, t.shape) # this should be redundant
+## if not np.isscalar(prob) and len(prob) == 1:
+## prob = prob[0]
+ if t.ndim == 0:
+ t = t[()]
+ prob = prob[()]
- #denom is just sqrt(var(d)/df)
- denom = np.sqrt(np.var(d,axis,ddof=1)/float(n))
- zerodivproblem = denom == 0
- t = np.mean(d, axis) / denom
- t = np.where(zerodivproblem, 1.0, t) # replace NaN t-values with 1.0
- probs = distributions.t.sf(np.abs(t),df)*2
- if not np.isscalar(t):
- probs = np.reshape(probs, t.shape)
- if not np.isscalar(probs) and len(probs) == 1:
- probs = probs[0]
- return t, probs
+ return t, prob
#import scipy.stats
Modified: trunk/scipy/stats/tests/test_stats.py
===================================================================
--- trunk/scipy/stats/tests/test_stats.py 2008-12-20 13:50:20 UTC (rev 5284)
+++ trunk/scipy/stats/tests/test_stats.py 2008-12-21 17:33:40 UTC (rev 5285)
@@ -1071,7 +1071,110 @@
t,p = stats.ttest_rel(rvs1_2D, rvs2_2D, axis=1)
assert_array_almost_equal([t,p],tpr)
+ #test on 3 dimensions
+ rvs1_3D = np.dstack([rvs1_2D,rvs1_2D,rvs1_2D])
+ rvs2_3D = np.dstack([rvs2_2D,rvs2_2D,rvs2_2D])
+ t,p = stats.ttest_rel(rvs1_3D, rvs2_3D, axis=1)
+ assert_array_almost_equal(np.abs(t), tr)
+ assert_array_almost_equal(np.abs(p), pr)
+ assert_equal(t.shape, (2, 3))
+ t,p = stats.ttest_rel(np.rollaxis(rvs1_3D,2), np.rollaxis(rvs2_3D,2), axis=2)
+ assert_array_almost_equal(np.abs(t), tr)
+ assert_array_almost_equal(np.abs(p), pr)
+ assert_equal(t.shape, (3, 2))
+ #test zero division problem
+ t,p = stats.ttest_rel([0,0,0],[1,1,1])
+ assert_equal((np.abs(t),p), (np.inf, 0))
+ assert_equal(stats.ttest_rel([0,0,0], [0,0,0]), (1.0, 0.42264973081037427))
+
+ #check that nan in input array result in nan output
+ anan = np.array([[1,np.nan],[-1,1]])
+ assert_equal(stats.ttest_ind(anan, np.zeros((2,2))),([0, np.nan], [1,np.nan]))
+
+
+def test_ttest_ind():
+ #regression test
+ tr = 1.0912746897927283
+ pr = 0.27647818616351882
+ tpr = ([tr,-tr],[pr,pr])
+
+ rvs2 = np.linspace(1,100,100)
+ rvs1 = np.linspace(5,105,100)
+ rvs1_2D = np.array([rvs1, rvs2])
+ rvs2_2D = np.array([rvs2, rvs1])
+
+ t,p = stats.ttest_ind(rvs1, rvs2, axis=0)
+ assert_array_almost_equal([t,p],(tr,pr))
+ t,p = stats.ttest_ind(rvs1_2D.T, rvs2_2D.T, axis=0)
+ assert_array_almost_equal([t,p],tpr)
+ t,p = stats.ttest_ind(rvs1_2D, rvs2_2D, axis=1)
+ assert_array_almost_equal([t,p],tpr)
+
+ #test on 3 dimensions
+ rvs1_3D = np.dstack([rvs1_2D,rvs1_2D,rvs1_2D])
+ rvs2_3D = np.dstack([rvs2_2D,rvs2_2D,rvs2_2D])
+ t,p = stats.ttest_ind(rvs1_3D, rvs2_3D, axis=1)
+ assert_almost_equal(np.abs(t), np.abs(tr))
+ assert_array_almost_equal(np.abs(p), pr)
+ assert_equal(t.shape, (2, 3))
+
+ t,p = stats.ttest_ind(np.rollaxis(rvs1_3D,2), np.rollaxis(rvs2_3D,2), axis=2)
+ assert_array_almost_equal(np.abs(t), np.abs(tr))
+ assert_array_almost_equal(np.abs(p), pr)
+ assert_equal(t.shape, (3, 2))
+
+ #test zero division problem
+ t,p = stats.ttest_ind([0,0,0],[1,1,1])
+ assert_equal((np.abs(t),p), (np.inf, 0))
+ assert_equal(stats.ttest_ind([0,0,0], [0,0,0]), (1.0, 0.37390096630005898))
+
+ #check that nan in input array result in nan output
+ anan = np.array([[1,np.nan],[-1,1]])
+ assert_equal(stats.ttest_ind(anan, np.zeros((2,2))),([0, np.nan], [1,np.nan]))
+
+
+
+
+def test_ttest_1samp_new():
+ n1, n2, n3 = (10,15,20)
+ rvn1 = stats.norm.rvs(loc=5,scale=10,size=(n1,n2,n3))
+ rvn2 = stats.norm.rvs(loc=5,scale=10,size=(n1,n2,n3))
+
+ #check multidimensional array and correct axis handling
+ #deterministic rvn1 and rvn2 would be better as in test_ttest_rel
+ t1,p1 = stats.ttest_1samp(rvn1[:,:,:], np.ones((n2,n3)),axis=0)
+ t2,p2 = stats.ttest_1samp(rvn1[:,:,:], 1,axis=0)
+ t3,p3 = stats.ttest_1samp(rvn1[:,0,0], 1)
+ assert_array_almost_equal(t1,t2, decimal=14)
+ assert_almost_equal(t1[0,0],t3, decimal=14)
+ assert_equal(t1.shape, (n2,n3))
+
+ t1,p1 = stats.ttest_1samp(rvn1[:,:,:], np.ones((n1,n3)),axis=1)
+ t2,p2 = stats.ttest_1samp(rvn1[:,:,:], 1,axis=1)
+ t3,p3 = stats.ttest_1samp(rvn1[0,:,0], 1)
+ assert_array_almost_equal(t1,t2, decimal=14)
+ assert_almost_equal(t1[0,0],t3, decimal=14)
+ assert_equal(t1.shape, (n1,n3))
+
+ t1,p1 = stats.ttest_1samp(rvn1[:,:,:], np.ones((n1,n2)),axis=2)
+ t2,p2 = stats.ttest_1samp(rvn1[:,:,:], 1,axis=2)
+ t3,p3 = stats.ttest_1samp(rvn1[0,0,:], 1)
+ assert_array_almost_equal(t1,t2, decimal=14)
+ assert_almost_equal(t1[0,0],t3, decimal=14)
+ assert_equal(t1.shape, (n1,n2))
+
+ #test zero division problem
+ t,p = stats.ttest_1samp([0,0,0], 1)
+ assert_equal((np.abs(t),p), (np.inf, 0))
+ assert_equal(stats.ttest_1samp([0,0,0], 0), (1.0, 0.42264973081037427))
+
+ #check that nan in input array result in nan output
+ anan = np.array([[1,np.nan],[-1,1]])
+ assert_equal(stats.ttest_1samp(anan, 0),([0, np.nan], [1,np.nan]))
+
+
+
if __name__ == "__main__":
run_module_suite()
More information about the Scipy-svn
mailing list