[Python-checkins] python/nondist/sandbox/statistics statistics.py,
1.7, 1.8 test_statistics.py, 1.2, 1.3
rhettinger at users.sourceforge.net
rhettinger at users.sourceforge.net
Sun Feb 29 04:12:30 EST 2004
Update of /cvsroot/python/python/nondist/sandbox/statistics
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv18374
Modified Files:
statistics.py test_statistics.py
Log Message:
* Adopt Skips suggestion to factor out variance() from standard deviation.
While the variance is not often used, converting back from the standard
deviation loses precision.
* Alter select() for a tri-partite partition instead of a two-way
partition. This increases the number of comparisons by 50% but
prevents the algorithm from losing its O(n) behavior when the
dataset contains many equal elements.
Index: statistics.py
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/statistics/statistics.py,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** statistics.py 17 Feb 2004 17:39:26 -0000 1.7
--- statistics.py 29 Feb 2004 09:12:28 -0000 1.8
***************
*** 46,55 ****
raise ValueError('data must have at least one element')
! def stddev(data, sample=True):
! """Computes the standard deviation of the dataset.
! If sample is True, computes the standard deviation with an n-1 divisor
! for an unbiased estimator for a data sample. If sample is False, computes
! with a divisor of n, giving the standard deviation for a complete population.
"""
--- 46,55 ----
raise ValueError('data must have at least one element')
! def variance(data, sample=True):
! """Computes the variance of the dataset.
! If sample is True, computes the variance with an n-1 divisor to reflect
! the additional degree of freedom in sampled data. If sample is False,
! computes with a divisor of n, giving the variance for a complete population.
"""
***************
*** 81,115 ****
if sample:
try:
! return (s / (k-1)) ** 0.5 # sample standard deviation
except ZeroDivisionError:
! raise ValueError('sample deviation requires at least two elements')
else:
! return (s / k) ** 0.5 # population standard deviation
def select(data, n):
"""Find the nth rank ordered element (the least value has rank 0).
"""
try:
while True:
! it = iter(data)
! pivot = it.next()
! under, notunder = [], []
! ua, nua = under.append, notunder.append
! for elem in it:
if elem < pivot:
! ua(elem)
! else:
! nua(elem)
if n < len(under):
data = under
! elif n == len(under):
return pivot
else:
! data = notunder
! n -= len(under) + 1
! if len(data) > 3:
! swap = random.randrange(len(data))
! data[0], data[swap] = data[swap], data[0]
! except StopIteration:
raise ValueError('not enough elements for the given rank')
--- 81,124 ----
if sample:
try:
! return (s / (k-1)) # sample variance
except ZeroDivisionError:
! raise ValueError('sample variance requires at least two elements')
else:
! return (s / k) # population variance
!
! def stddev(data, sample=True):
! """Computes the standard deviation of the dataset.
!
! If sample is True, computes the standard deviation with an n-1 divisor
! for an unbiased estimator for a data sample. If sample is False, computes
! with a divisor of n, giving the standard deviation for a complete population.
! """
! return variance(data, sample) ** 0.5
def select(data, n):
"""Find the nth rank ordered element (the least value has rank 0).
+
+ Equivalent to sorted(data)[n]
"""
+ data = list(data)
try:
while True:
! pivot = random.choice(data)
! pcount = data.count(pivot)
! over, under = [], []
! uappend, oappend = under.append, over.append
! for elem in data:
if elem < pivot:
! uappend(elem)
! elif elem > pivot:
! oappend(elem)
if n < len(under):
data = under
! elif n < len(under) + pcount:
return pivot
else:
! data = over
! n -= len(under) + pcount
! except IndexError:
raise ValueError('not enough elements for the given rank')
Index: test_statistics.py
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/statistics/test_statistics.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** test_statistics.py 17 Feb 2004 13:00:16 -0000 1.2
--- test_statistics.py 29 Feb 2004 09:12:28 -0000 1.3
***************
*** 51,73 ****
def test_select(self):
! n = 200
! a = range(n)
random.shuffle(a)
for i in xrange(100):
nth = random.randrange(n)
! self.assertEqual(select(a, nth), nth)
# Try with sorted data
! a = range(n)
for i in xrange(100):
nth = random.randrange(n)
! self.assertEqual(select(a, nth), nth)
# Try with reverse sorted data
! a = range(n)
a.reverse()
for i in xrange(100):
nth = random.randrange(n)
! self.assertEqual(select(a, nth), nth)
def test_nlargest(self):
--- 51,77 ----
def test_select(self):
! testdata = range(2000) + [20] * 500 + [85] * 500
! sorteddata = sorted(testdata)
! n = len(testdata)
! a = testdata[:]
random.shuffle(a)
for i in xrange(100):
nth = random.randrange(n)
! self.assertEqual(select(a, nth), sorteddata[nth])
# Try with sorted data
! a = sorteddata[:]
for i in xrange(100):
nth = random.randrange(n)
! self.assertEqual(select(a, nth), sorteddata[nth])
# Try with reverse sorted data
! a = sorteddata[:]
a.reverse()
for i in xrange(100):
nth = random.randrange(n)
! self.assertEqual(select(a, nth), sorteddata[nth])
!
! self.assertRaises(ValueError, select, a, n+1)
def test_nlargest(self):
More information about the Python-checkins
mailing list