[Scipy-svn] r5173 - in trunk/scipy/stats: . tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Sat Nov 22 22:41:19 EST 2008
Author: josef
Date: 2008-11-22 21:41:15 -0600 (Sat, 22 Nov 2008)
New Revision: 5173
Modified:
trunk/scipy/stats/stats.py
trunk/scipy/stats/tests/test_stats.py
Log:
correction for friedmanchisquare ticket:113, add tests (including for mstats version)
Modified: trunk/scipy/stats/stats.py
===================================================================
--- trunk/scipy/stats/stats.py 2008-11-22 23:14:44 UTC (rev 5172)
+++ trunk/scipy/stats/stats.py 2008-11-23 03:41:15 UTC (rev 5173)
@@ -190,6 +190,9 @@
import warnings
import math
+#friedmanchisquare patch uses python sum
+pysum = sum # save it before it gets overwritten
+
# Scipy imports.
from numpy import array, asarray, dot, ma, zeros, sum
import scipy.special as special
@@ -2139,22 +2142,39 @@
"""Friedman Chi-Square is a non-parametric, one-way within-subjects
ANOVA. This function calculates the Friedman Chi-square test for
repeated measures and returns the result, along with the associated
- probability value. It assumes 3 or more repeated measures. Only 3
- levels requires a minimum of 10 subjects in the study. Four levels
- requires 5 subjects per level(??).
+ probability value.
- Returns: chi-square statistic, associated p-value
+ This function uses Chisquared aproximation of Friedman Chisquared
+ distribution. This is exact only if n > 10 and factor levels > 6.
+
+ Returns: friedman chi-square statistic, associated p-valueIt assumes 3 or more repeated measures. Only 3
"""
k = len(args)
if k < 3:
raise ValueError, '\nLess than 3 levels. Friedman test not appropriate.\n'
n = len(args[0])
+ for i in range(1,k):
+ if len(args[i]) <> n:
+ raise ValueError, 'Unequal N in friedmanchisquare. Aborting.'
+ if n < 10 and k < 6:
+ print 'Warning: friedmanchisquare test using Chisquared aproximation'
+
+ # Rank data
data = apply(_support.abut,args)
data = data.astype(float)
for i in range(len(data)):
data[i] = rankdata(data[i])
- ssbn = np.sum(np.sum(args,1)**2,axis=0)
- chisq = 12.0 / (k*n*(k+1)) * ssbn - 3*n*(k+1)
+
+ # Handle ties
+ ties = 0
+ for i in range(len(data)):
+ replist, repnum = scipy.stats.find_repeats(array(data[i]))
+ for t in repnum:
+ ties += t*(t*t-1)
+ c = 1 - ties / float(k*(k*k-1)*n)
+
+ ssbn = pysum(pysum(data)**2)
+ chisq = ( 12.0 / (k*n*(k+1)) * ssbn - 3*n*(k+1) ) / c
return chisq, chisqprob(chisq,k-1)
Modified: trunk/scipy/stats/tests/test_stats.py
===================================================================
--- trunk/scipy/stats/tests/test_stats.py 2008-11-22 23:14:44 UTC (rev 5172)
+++ trunk/scipy/stats/tests/test_stats.py 2008-11-23 03:41:15 UTC (rev 5173)
@@ -956,5 +956,74 @@
assert_equal(stats.percentileofscore([ 10,20,30,50,60,70,80,90,100,110],0,kind = 'mean'),0.0)
+
+def test_friedmanchisquare():
+ # see ticket:113
+ # verified with matlab and R
+ #From Demsar "Statistical Comparisons of Classifiers over Multiple Data Sets"
+ #2006, Xf=9.28 (no tie handling, tie corrected Xf >=9.28)
+ x1 = [array([0.763, 0.599, 0.954, 0.628, 0.882, 0.936, 0.661, 0.583,
+ 0.775, 1.0, 0.94, 0.619, 0.972, 0.957]),
+ array([0.768, 0.591, 0.971, 0.661, 0.888, 0.931, 0.668, 0.583,
+ 0.838, 1.0, 0.962, 0.666, 0.981, 0.978]),
+ array([0.771, 0.590, 0.968, 0.654, 0.886, 0.916, 0.609, 0.563,
+ 0.866, 1.0, 0.965, 0.614, 0.9751, 0.946]),
+ array([0.798, 0.569, 0.967, 0.657, 0.898, 0.931, 0.685, 0.625,
+ 0.875, 1.0, 0.962, 0.669, 0.975, 0.970])]
+
+ #From "Bioestadistica para las ciencias de la salud" Xf=18.95 p<0.001:
+ x2 = [array([4,3,5,3,5,3,2,5,4,4,4,3]),
+ array([2,2,1,2,3,1,2,3,2,1,1,3]),
+ array([2,4,3,3,4,3,3,4,4,1,2,1]),
+ array([3,5,4,3,4,4,3,3,3,4,4,4])]
+
+ #From Jerrorl H. Zar, "Biostatistical Analysis"(example 12.6), Xf=10.68, 0.005 < p < 0.01:
+ #Probability from this example is inexact using Chisquare aproximation of Friedman Chisquare.
+ x3 = [array([7.0,9.9,8.5,5.1,10.3]),
+ array([5.3,5.7,4.7,3.5,7.7]),
+ array([4.9,7.6,5.5,2.8,8.4]),
+ array([8.8,8.9,8.1,3.3,9.1])]
+
+ # Hollander & Wolfe (1973), p. 140ff.
+ # from R help file
+ xR = array( [5.40, 5.50, 5.55,
+ 5.85, 5.70, 5.75,
+ 5.20, 5.60, 5.50,
+ 5.55, 5.50, 5.40,
+ 5.90, 5.85, 5.70,
+ 5.45, 5.55, 5.60,
+ 5.40, 5.40, 5.35,
+ 5.45, 5.50, 5.35,
+ 5.25, 5.15, 5.00,
+ 5.85, 5.80, 5.70,
+ 5.25, 5.20, 5.10,
+ 5.65, 5.55, 5.45,
+ 5.60, 5.35, 5.45,
+ 5.05, 5.00, 4.95,
+ 5.50, 5.50, 5.40,
+ 5.45, 5.55, 5.50,
+ 5.55, 5.55, 5.35,
+ 5.45, 5.50, 5.55,
+ 5.50, 5.45, 5.25,
+ 5.65, 5.60, 5.40,
+ 5.70, 5.65, 5.55,
+ 6.30, 6.30, 6.25])
+ xR = xR.reshape((len(xR)/3.0,3)).T
+
+ #assert_array_almost_equal(stats.friedmanchisquare(xR[0],xR[1],xR[2]),(11.1429, 0.003805),3)
+ assert_array_almost_equal(stats.friedmanchisquare(x1[0],x1[1],x1[2],x1[3]),(10.2283464566929, 0.0167215803284414))
+ assert_array_almost_equal(stats.friedmanchisquare(x2[0],x2[1],x2[2],x2[3]),(18.9428571428571, 0.000280938375189499))
+ assert_array_almost_equal(stats.friedmanchisquare(x3[0],x3[1],x3[2],x3[3]),(10.68, 0.0135882729582176))
+ np.testing.assert_raises(ValueError, stats.friedmanchisquare,x3[0],x3[1])
+
+ assert_array_almost_equal(stats.mstats.friedmanchisquare(xR[0],xR[1],xR[2]),(11.1429, 0.003805),4)
+ assert_array_almost_equal(stats.mstats.friedmanchisquare(x1[0],x1[1],x1[2],x1[3]),(10.2283464566929, 0.0167215803284414))
+ # the following fails
+ #assert_array_almost_equal(stats.mstats.friedmanchisquare(x2[0],x2[1],x2[2],x2[3]),(18.9428571428571, 0.000280938375189499))
+ assert_array_almost_equal(stats.mstats.friedmanchisquare(x3[0],x3[1],x3[2],x3[3]),(10.68, 0.0135882729582176))
+ np.testing.assert_raises(ValueError,stats.mstats.friedmanchisquare,x3[0],x3[1])
+
+
+
if __name__ == "__main__":
run_module_suite()
More information about the Scipy-svn
mailing list