import numpy as np from scipy import stats results = [] n = 100 #number of observations loc1, scale1 = 1, 2 loc2, scale2 = 1, 2 n_repl = 1000 print '='*50 print 'Monte Carlo for K-S 2sample test (ks_2samp):' print 'sample size = %d, %d replications' % (n,n_repl) print 'sample 1: normal distribution (loc=%f,scale=%f)' % (loc1,scale1) print 'sample 2: normal distribution (loc=%f,scale=%f)' % (loc2,scale2) for ii in range(n_repl): rvs1 = stats.norm.rvs(size=n,loc=loc1,scale=scale1) rvs2 = stats.norm.rvs(size=n,loc=loc2,scale=scale2) results.append(stats.ks_2samp(rvs1,rvs2)) print 'ks_2samp: proportion of rejection at 1% significance:', print 1-np.sum(np.array(results)[:,1]>0.01,axis=0)/float(n_repl) print 'ks_2samp: proportion of rejection at 5% significance:', print 1-np.sum(np.array(results)[:,1]>0.05,axis=0)/float(n_repl) print 'ks_2samp: proportion of rejection at 10% significance:', print 1-np.sum(np.array(results)[:,1]>0.1,axis=0)/float(n_repl) #======================================= loc1, scale1 = 0, 1 loc2, scale2 = 0, 1 n=500 print '='*50 print 'Monte Carlo for K-S 2sample test (ks_2samp):' print 'sample size = %d, %d replications' % (n,n_repl) print 'sample 1: normal distribution (loc=%f,scale=%f)' % (loc1,scale1) print 'sample 2: t distribution (dof=10, loc=%f,scale=%f)' % (loc2,scale2) results = [] for ii in range(n_repl): rvs1 = stats.norm.rvs(size=n,loc=loc1,scale=scale1) rvs2 = stats.t.rvs(2,size=n,loc=loc2,scale=scale2) results.append(stats.ks_2samp(rvs1,rvs2)) print 'ks_2samp: proportion of rejection at 1% significance:', print 1-np.sum(np.array(results)[:,1]>0.01,axis=0)/float(n_repl) print 'ks_2samp: proportion of rejection at 5% significance:', print 1-np.sum(np.array(results)[:,1]>0.05,axis=0)/float(n_repl) print 'ks_2samp: proportion of rejection at 10% significance:', print 1-np.sum(np.array(results)[:,1]>0.1,axis=0)/float(n_repl)