[Scipy-svn] r5073 - trunk/scipy/stats/tests
scipy-svn at scipy.org
scipy-svn at scipy.org
Wed Nov 12 20:46:19 EST 2008
Author: josef
Date: 2008-11-12 19:45:30 -0600 (Wed, 12 Nov 2008)
New Revision: 5073
Added:
trunk/scipy/stats/tests/test_discrete_chisquare.py
Log:
add chisquare test comparing random sample with cdf (first try of commit)
Added: trunk/scipy/stats/tests/test_discrete_chisquare.py
===================================================================
--- trunk/scipy/stats/tests/test_discrete_chisquare.py 2008-11-13 00:08:24 UTC (rev 5072)
+++ trunk/scipy/stats/tests/test_discrete_chisquare.py 2008-11-13 01:45:30 UTC (rev 5073)
@@ -0,0 +1,102 @@
+
+import numpy as np
+from scipy import stats
+
+debug = False
+
+
+def check_discrete_chisquare(distname, arg, alpha = 0.01):
+ '''perform chisquare test for random sample of a discrete distribution
+
+ Parameters
+ ----------
+ distname : string
+ name of distribution function
+ arg : sequence
+ parameters of distribution
+ alpha : float
+ significance level, threshold for p-value
+
+ Returns
+ -------
+ result : bool
+ 0 if test passes, 1 if test fails
+
+ uses global variable debug for printing results
+ '''
+
+ # define parameters for test
+ n=50000
+ nsupp = 20
+ wsupp = 1.0/nsupp
+
+ distfn = getattr(stats, distname)
+ rvs = distfn.rvs(size=n,*arg)
+
+ # construct intervals with minimum mass 1/nsupp
+ # intervalls are left-half-open as in a cdf difference
+ distsupport = xrange(max(distfn.a, -1000), min(distfn.b, 1000) + 1)
+ last = 0
+ distsupp = [max(distfn.a, -1000)]
+ distmass = []
+ for ii in distsupport:
+ current = distfn.cdf(ii,*arg)
+ if current - last >= wsupp-1e-14:
+ distsupp.append(ii)
+ distmass.append(current - last)
+ last = current
+ if current > (1-wsupp):
+ break
+ if distsupp[-1] < distfn.b:
+ distsupp.append(distfn.b)
+ distmass.append(1-last)
+ distsupp = np.array(distsupp)
+ distmass = np.array(distmass)
+
+ # convert intervals to right-half-open as required by histogram
+ histsupp = distsupp+1e-8
+ histsupp[0] = distfn.a
+
+ # find sample frequencies and perform chisquare test
+ freq,hsupp = np.histogram(rvs,histsupp,new=True)
+ cdfs = distfn.cdf(distsupp,*arg)
+ (chis,pval) = stats.chisquare(np.array(freq),n*distmass)
+
+ # print and return results
+ if debug:
+ print 'chis,pval:', chis, pval
+ print 'len(distsupp), len(distmass), len(hsupp), len(freq)'
+ print len(distsupp), len(distmass), len(hsupp), len(freq)
+ print 'distsupp', distsupp
+ print 'distmass', n*np.array(distmass)
+ print 'freq', freq
+ print 'itemfreq', stats.itemfreq(rvs)
+ print 'n*pmf', n*distfn.pmf(list(distsupport)[:10],*arg)
+
+ assert (pval > alpha), 'chisquare - test for %s' \
+ 'at arg = %s' % (distname,str(arg))
+
+
+def test_discrete_rvs_cdf():
+ distdiscrete = [
+ ['bernoulli',(0.3,)],
+ ['binom', (5, 0.4)],
+ ['boltzmann',(1.4, 19)],
+ ['dlaplace', (0.8,)],
+ ['geom', (0.5,)],
+ ['hypergeom',(30, 12, 6)],
+ ['logser', (0.6,)],
+ ['nbinom', (5, 0.5)],
+ ['planck', (4.1,)],
+ ['poisson', (0.6,)],
+ ['randint', (7, 31)],
+ ['zipf', (2,)] ]
+
+ for distname, arg in distdiscrete:
+ if debug:
+ print distname
+ yield check_discrete_chisquare, distname, arg
+
+if __name__ == '__main__':
+ import nose
+ nose.run(argv=['', __file__])
More information about the Scipy-svn
mailing list