[Python-checkins] python/dist/src/Lib random.py,1.35,1.36
rhettinger@users.sourceforge.net
rhettinger@users.sourceforge.net
Tue, 12 Nov 2002 09:42:00 -0800
Update of /cvsroot/python/python/dist/src/Lib
In directory usw-pr-cvs1:/tmp/cvs-serv10733/lib
Modified Files:
random.py
Log Message:
SF patch 629637: Add sample(population, k) method to the random module.
Used for random sampling without replacement.
Index: random.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/random.py,v
retrieving revision 1.35
retrieving revision 1.36
diff -C2 -d -r1.35 -r1.36
*** random.py 16 Aug 2002 03:41:39 -0000 1.35
--- random.py 12 Nov 2002 17:41:57 -0000 1.36
***************
*** 8,11 ****
--- 8,12 ----
---------
pick random element
+ pick random sample
generate random permutation
***************
*** 78,82 ****
from math import floor as _floor
! __all__ = ["Random","seed","random","uniform","randint","choice",
"randrange","shuffle","normalvariate","lognormvariate",
"cunifvariate","expovariate","vonmisesvariate","gammavariate",
--- 79,83 ----
from math import floor as _floor
! __all__ = ["Random","seed","random","uniform","randint","choice","sample",
"randrange","shuffle","normalvariate","lognormvariate",
"cunifvariate","expovariate","vonmisesvariate","gammavariate",
***************
*** 374,377 ****
--- 375,415 ----
x[i], x[j] = x[j], x[i]
+ def sample(self, population, k, random=None, int=int):
+ """Chooses k unique random elements from a population sequence.
+
+ Returns a new list containing elements from the population. The
+ list itself is in random order so that all sub-slices are also
+ random samples. The original sequence is left undisturbed.
+
+ If the population has repeated elements, then each occurence is
+ a possible selection in the sample.
+
+ If indices are needed for a large population, use xrange as an
+ argument: sample(xrange(10000000), 60)
+
+ Optional arg random is a 0-argument function returning a random
+ float in [0.0, 1.0); by default, the standard random.random.
+ """
+
+ n = len(population)
+ if not 0 <= k <= n:
+ raise ValueError, "sample larger than population"
+ if random is None:
+ random = self.random
+ if n < 6 * k: # if n len list takes less space than a k len dict
+ pool = list(population)
+ for i in xrange(n-1, n-k-1, -1):
+ j = int(random() * (i+1))
+ pool[i], pool[j] = pool[j], pool[i]
+ return pool[-k:]
+ inorder = [None] * k
+ selections = {}
+ for i in xrange(k):
+ j = int(random() * n)
+ while j in selections:
+ j = int(random() * n)
+ selections[j] = inorder[i] = population[j]
+ return inorder # return selections in the order they were picked
+
## -------------------- real-valued distributions -------------------
***************
*** 712,716 ****
(avg, stddev, smallest, largest)
! def _test(N=20000):
print 'TWOPI =', TWOPI
print 'LOG4 =', LOG4
--- 750,766 ----
(avg, stddev, smallest, largest)
! def _test_sample(n):
! # For the entire allowable range of 0 <= k <= n, validate that
! # the sample is of the correct length and contains only unique items
! population = xrange(n)
! for k in xrange(n+1):
! s = sample(population, k)
! assert len(dict([(elem,True) for elem in s])) == len(s) == k
!
! def _sample_generator(n, k):
! # Return a fixed element from the sample. Validates random ordering.
! return sample(xrange(n), k)[k//2]
!
! def _test(N=2000):
print 'TWOPI =', TWOPI
print 'LOG4 =', LOG4
***************
*** 736,739 ****
--- 786,792 ----
_test_generator(N, 'paretovariate(1.0)')
_test_generator(N, 'weibullvariate(1.0, 1.0)')
+ _test_generator(N, '_sample_generator(50, 5)') # expected s.d.: 14.4
+ _test_generator(N, '_sample_generator(50, 45)') # expected s.d.: 14.4
+ _test_sample(1000)
# Test jumpahead.
***************
*** 761,764 ****
--- 814,818 ----
choice = _inst.choice
randrange = _inst.randrange
+ sample = _inst.sample
shuffle = _inst.shuffle
normalvariate = _inst.normalvariate