python/nondist/sandbox/statistics statistics.py, 1.7, 1.8 test_statistics.py, 1.2, 1.3

Update of /cvsroot/python/python/nondist/sandbox/statistics In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv18374 Modified Files: statistics.py test_statistics.py Log Message: * Adopt Skips suggestion to factor out variance() from standard deviation. While the variance is not often used, converting back from the standard deviation loses precision. * Alter select() for a tri-partite partition instead of a two-way partition. This increases the number of comparisons by 50% but prevents the algorithm from losing its O(n) behavior when the dataset contains many equal elements. Index: statistics.py =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/statistics/statistics.py,v retrieving revision 1.7 retrieving revision 1.8 diff -C2 -d -r1.7 -r1.8 *** statistics.py 17 Feb 2004 17:39:26 -0000 1.7 --- statistics.py 29 Feb 2004 09:12:28 -0000 1.8 *************** *** 46,55 **** raise ValueError('data must have at least one element') ! def stddev(data, sample=True): ! """Computes the standard deviation of the dataset. ! If sample is True, computes the standard deviation with an n-1 divisor ! for an unbiased estimator for a data sample. If sample is False, computes ! with a divisor of n, giving the standard deviation for a complete population. """ --- 46,55 ---- raise ValueError('data must have at least one element') ! def variance(data, sample=True): ! """Computes the variance of the dataset. ! If sample is True, computes the variance with an n-1 divisor to reflect ! the additional degree of freedom in sampled data. If sample is False, ! computes with a divisor of n, giving the variance for a complete population. """ *************** *** 81,115 **** if sample: try: ! return (s / (k-1)) ** 0.5 # sample standard deviation except ZeroDivisionError: ! raise ValueError('sample deviation requires at least two elements') else: ! return (s / k) ** 0.5 # population standard deviation def select(data, n): """Find the nth rank ordered element (the least value has rank 0). """ try: while True: ! it = iter(data) ! pivot = it.next() ! under, notunder = [], [] ! ua, nua = under.append, notunder.append ! for elem in it: if elem < pivot: ! ua(elem) ! else: ! nua(elem) if n < len(under): data = under ! elif n == len(under): return pivot else: ! data = notunder ! n -= len(under) + 1 ! if len(data) > 3: ! swap = random.randrange(len(data)) ! data[0], data[swap] = data[swap], data[0] ! except StopIteration: raise ValueError('not enough elements for the given rank') --- 81,124 ---- if sample: try: ! return (s / (k-1)) # sample variance except ZeroDivisionError: ! raise ValueError('sample variance requires at least two elements') else: ! return (s / k) # population variance ! ! def stddev(data, sample=True): ! """Computes the standard deviation of the dataset. ! ! If sample is True, computes the standard deviation with an n-1 divisor ! for an unbiased estimator for a data sample. If sample is False, computes ! with a divisor of n, giving the standard deviation for a complete population. ! """ ! return variance(data, sample) ** 0.5 def select(data, n): """Find the nth rank ordered element (the least value has rank 0). + + Equivalent to sorted(data)[n] """ + data = list(data) try: while True: ! pivot = random.choice(data) ! pcount = data.count(pivot) ! over, under = [], [] ! uappend, oappend = under.append, over.append ! for elem in data: if elem < pivot: ! uappend(elem) ! elif elem > pivot: ! oappend(elem) if n < len(under): data = under ! elif n < len(under) + pcount: return pivot else: ! data = over ! n -= len(under) + pcount ! except IndexError: raise ValueError('not enough elements for the given rank') Index: test_statistics.py =================================================================== RCS file: /cvsroot/python/python/nondist/sandbox/statistics/test_statistics.py,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -d -r1.2 -r1.3 *** test_statistics.py 17 Feb 2004 13:00:16 -0000 1.2 --- test_statistics.py 29 Feb 2004 09:12:28 -0000 1.3 *************** *** 51,73 **** def test_select(self): ! n = 200 ! a = range(n) random.shuffle(a) for i in xrange(100): nth = random.randrange(n) ! self.assertEqual(select(a, nth), nth) # Try with sorted data ! a = range(n) for i in xrange(100): nth = random.randrange(n) ! self.assertEqual(select(a, nth), nth) # Try with reverse sorted data ! a = range(n) a.reverse() for i in xrange(100): nth = random.randrange(n) ! self.assertEqual(select(a, nth), nth) def test_nlargest(self): --- 51,77 ---- def test_select(self): ! testdata = range(2000) + [20] * 500 + [85] * 500 ! sorteddata = sorted(testdata) ! n = len(testdata) ! a = testdata[:] random.shuffle(a) for i in xrange(100): nth = random.randrange(n) ! self.assertEqual(select(a, nth), sorteddata[nth]) # Try with sorted data ! a = sorteddata[:] for i in xrange(100): nth = random.randrange(n) ! self.assertEqual(select(a, nth), sorteddata[nth]) # Try with reverse sorted data ! a = sorteddata[:] a.reverse() for i in xrange(100): nth = random.randrange(n) ! self.assertEqual(select(a, nth), sorteddata[nth]) ! ! self.assertRaises(ValueError, select, a, n+1) def test_nlargest(self):
participants (1)
-
rhettinger@users.sourceforge.net