[Python-checkins] python/dist/src/Lib random.py,1.35,1.36

rhettinger@users.sourceforge.net rhettinger@users.sourceforge.net
Tue, 12 Nov 2002 09:42:00 -0800


Update of /cvsroot/python/python/dist/src/Lib
In directory usw-pr-cvs1:/tmp/cvs-serv10733/lib

Modified Files:
	random.py 
Log Message:
SF patch 629637:  Add sample(population, k) method to the random module.
Used for random sampling without replacement.



Index: random.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/random.py,v
retrieving revision 1.35
retrieving revision 1.36
diff -C2 -d -r1.35 -r1.36
*** random.py	16 Aug 2002 03:41:39 -0000	1.35
--- random.py	12 Nov 2002 17:41:57 -0000	1.36
***************
*** 8,11 ****
--- 8,12 ----
      ---------
             pick random element
+            pick random sample
             generate random permutation
  
***************
*** 78,82 ****
  from math import floor as _floor
  
! __all__ = ["Random","seed","random","uniform","randint","choice",
             "randrange","shuffle","normalvariate","lognormvariate",
             "cunifvariate","expovariate","vonmisesvariate","gammavariate",
--- 79,83 ----
  from math import floor as _floor
  
! __all__ = ["Random","seed","random","uniform","randint","choice","sample",
             "randrange","shuffle","normalvariate","lognormvariate",
             "cunifvariate","expovariate","vonmisesvariate","gammavariate",
***************
*** 374,377 ****
--- 375,415 ----
              x[i], x[j] = x[j], x[i]
  
+     def sample(self, population, k, random=None, int=int):
+         """Chooses k unique random elements from a population sequence.
+ 
+         Returns a new list containing elements from the population.  The
+         list itself is in random order so that all sub-slices are also
+         random samples.  The original sequence is left undisturbed.
+ 
+         If the population has repeated elements, then each occurence is
+         a possible selection in the sample.
+ 
+         If indices are needed for a large population, use xrange as an
+         argument:  sample(xrange(10000000), 60)
+ 
+         Optional arg random is a 0-argument function returning a random
+         float in [0.0, 1.0); by default, the standard random.random.
+         """
+ 
+         n = len(population)
+         if not 0 <= k <= n:
+             raise ValueError, "sample larger than population"
+         if random is None:
+             random = self.random
+         if n < 6 * k:     # if n len list takes less space than a k len dict
+             pool = list(population)
+             for i in xrange(n-1, n-k-1, -1):
+                 j = int(random() * (i+1))
+                 pool[i], pool[j] = pool[j], pool[i]
+             return pool[-k:]
+         inorder = [None] * k
+         selections = {}
+         for i in xrange(k):
+             j = int(random() * n)
+             while j in selections:
+                 j = int(random() * n)
+             selections[j] = inorder[i] = population[j]
+         return inorder     # return selections in the order they were picked
+ 
  ## -------------------- real-valued distributions  -------------------
  
***************
*** 712,716 ****
                (avg, stddev, smallest, largest)
  
! def _test(N=20000):
      print 'TWOPI         =', TWOPI
      print 'LOG4          =', LOG4
--- 750,766 ----
                (avg, stddev, smallest, largest)
  
! def _test_sample(n):
!     # For the entire allowable range of 0 <= k <= n, validate that
!     # the sample is of the correct length and contains only unique items
!     population = xrange(n)
!     for k in xrange(n+1):
!         s = sample(population, k)
!         assert len(dict([(elem,True) for elem in s])) == len(s) == k
! 
! def _sample_generator(n, k):
!     # Return a fixed element from the sample.  Validates random ordering.
!     return sample(xrange(n), k)[k//2]
! 
! def _test(N=2000):
      print 'TWOPI         =', TWOPI
      print 'LOG4          =', LOG4
***************
*** 736,739 ****
--- 786,792 ----
      _test_generator(N, 'paretovariate(1.0)')
      _test_generator(N, 'weibullvariate(1.0, 1.0)')
+     _test_generator(N, '_sample_generator(50, 5)')  # expected s.d.: 14.4
+     _test_generator(N, '_sample_generator(50, 45)') # expected s.d.: 14.4
+     _test_sample(1000)
  
      # Test jumpahead.
***************
*** 761,764 ****
--- 814,818 ----
  choice = _inst.choice
  randrange = _inst.randrange
+ sample = _inst.sample
  shuffle = _inst.shuffle
  normalvariate = _inst.normalvariate