[Scipy-svn] r5073 - trunk/scipy/stats/tests

scipy-svn at scipy.org scipy-svn at scipy.org
Wed Nov 12 20:46:19 EST 2008


Author: josef
Date: 2008-11-12 19:45:30 -0600 (Wed, 12 Nov 2008)
New Revision: 5073

Added:
   trunk/scipy/stats/tests/test_discrete_chisquare.py
Log:
add chisquare test comparing random sample with cdf (first try of commit)

Added: trunk/scipy/stats/tests/test_discrete_chisquare.py
===================================================================
--- trunk/scipy/stats/tests/test_discrete_chisquare.py	2008-11-13 00:08:24 UTC (rev 5072)
+++ trunk/scipy/stats/tests/test_discrete_chisquare.py	2008-11-13 01:45:30 UTC (rev 5073)
@@ -0,0 +1,102 @@
+
+import numpy as np
+from scipy import stats
+
+debug = False
+
+
+def check_discrete_chisquare(distname, arg, alpha = 0.01):
+    '''perform chisquare test for random sample of a discrete distribution
+
+    Parameters
+    ----------
+    distname : string
+        name of distribution function
+    arg : sequence
+        parameters of distribution
+    alpha : float
+        significance level, threshold for p-value
+
+    Returns
+    -------
+    result : bool
+        0 if test passes, 1 if test fails
+
+    uses global variable debug for printing results
+    '''
+
+    # define parameters for test
+    n=50000
+    nsupp = 20
+    wsupp = 1.0/nsupp
+
+    distfn = getattr(stats, distname)
+    rvs = distfn.rvs(size=n,*arg)
+
+    # construct intervals with minimum mass 1/nsupp
+    # intervalls are left-half-open as in a cdf difference
+    distsupport = xrange(max(distfn.a, -1000), min(distfn.b, 1000) + 1)
+    last = 0
+    distsupp = [max(distfn.a, -1000)]
+    distmass = []
+    for ii in distsupport:
+        current = distfn.cdf(ii,*arg)
+        if  current - last >= wsupp-1e-14:
+            distsupp.append(ii)
+            distmass.append(current - last)
+            last = current
+            if current > (1-wsupp):
+                break
+    if distsupp[-1]  < distfn.b:
+        distsupp.append(distfn.b)
+        distmass.append(1-last)
+    distsupp = np.array(distsupp)
+    distmass = np.array(distmass)
+
+    # convert intervals to right-half-open as required by histogram
+    histsupp = distsupp+1e-8
+    histsupp[0] = distfn.a
+
+    # find sample frequencies and perform chisquare test
+    freq,hsupp = np.histogram(rvs,histsupp,new=True)
+    cdfs = distfn.cdf(distsupp,*arg)
+    (chis,pval) = stats.chisquare(np.array(freq),n*distmass)
+
+    # print and return results
+    if debug:
+        print 'chis,pval:', chis, pval
+        print 'len(distsupp), len(distmass), len(hsupp), len(freq)'
+        print len(distsupp), len(distmass), len(hsupp), len(freq)
+        print 'distsupp', distsupp
+        print 'distmass', n*np.array(distmass)
+        print 'freq', freq
+        print 'itemfreq', stats.itemfreq(rvs)
+        print 'n*pmf', n*distfn.pmf(list(distsupport)[:10],*arg)
+
+    assert (pval > alpha), 'chisquare - test for %s' \
+           'at arg = %s' % (distname,str(arg))
+
+
+def test_discrete_rvs_cdf():
+    distdiscrete = [
+        ['bernoulli',(0.3,)],
+        ['binom',    (5, 0.4)],
+        ['boltzmann',(1.4, 19)],
+        ['dlaplace', (0.8,)],
+        ['geom',     (0.5,)],
+        ['hypergeom',(30, 12, 6)],
+        ['logser',   (0.6,)],
+        ['nbinom',   (5, 0.5)],
+        ['planck',   (4.1,)],
+        ['poisson',  (0.6,)],
+        ['randint',  (7, 31)],
+        ['zipf',     (2,)] ]
+
+    for distname, arg in distdiscrete:
+        if debug:
+            print distname
+        yield check_discrete_chisquare, distname, arg
+
+if __name__ == '__main__':
+    import nose
+    nose.run(argv=['', __file__])




More information about the Scipy-svn mailing list