[Python-checkins] python/dist/src/Lib random.py,1.71,1.72

rhettinger@users.sourceforge.net rhettinger at users.sourceforge.net
Fri Aug 19 03:36:45 CEST 2005


Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv27122

Modified Files:
	random.py 
Log Message:
Implement random.sample() using sets instead of dicts.

Index: random.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/random.py,v
retrieving revision 1.71
retrieving revision 1.72
diff -u -d -r1.71 -r1.72
--- random.py	30 Apr 2005 09:02:51 -0000	1.71
+++ random.py	19 Aug 2005 01:36:35 -0000	1.72
@@ -41,7 +41,7 @@
 
 from warnings import warn as _warn
 from types import MethodType as _MethodType, BuiltinMethodType as _BuiltinMethodType
-from math import log as _log, exp as _exp, pi as _pi, e as _e
+from math import log as _log, exp as _exp, pi as _pi, e as _e, ceil as _ceil
 from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin
 from os import urandom as _urandom
 from binascii import hexlify as _hexlify
@@ -286,15 +286,14 @@
         """
 
         # Sampling without replacement entails tracking either potential
-        # selections (the pool) in a list or previous selections in a
-        # dictionary.
+        # selections (the pool) in a list or previous selections in a set.
 
         # When the number of selections is small compared to the
         # population, then tracking selections is efficient, requiring
-        # only a small dictionary and an occasional reselection.  For
+        # only a small set and an occasional reselection.  For
         # a larger number of selections, the pool tracking method is
         # preferred since the list takes less space than the
-        # dictionary and it doesn't suffer from frequent reselections.
+        # set and it doesn't suffer from frequent reselections.
 
         n = len(population)
         if not 0 <= k <= n:
@@ -302,7 +301,10 @@
         random = self.random
         _int = int
         result = [None] * k
-        if n < 6 * k:     # if n len list takes less space than a k len dict
+        setsize = 21        # size of a small set minus size of an empty list
+        if k > 5:
+              setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets
+        if n <= setsize:    # is an n-length list smaller than a k-length set
             pool = list(population)
             for i in xrange(k):         # invariant:  non-selected at [0,n-i)
                 j = _int(random() * (n-i))
@@ -311,14 +313,16 @@
         else:
             try:
                 n > 0 and (population[0], population[n//2], population[n-1])
-            except (TypeError, KeyError):   # handle sets and dictionaries
+            except (TypeError, KeyError):   # handle non-sequence iterables
                 population = tuple(population)
-            selected = {}
+            selected = set()
+            selected_add = selected.add
             for i in xrange(k):
                 j = _int(random() * n)
                 while j in selected:
                     j = _int(random() * n)
-                result[i] = selected[j] = population[j]
+                selected_add(j)
+                result[i] = population[j]
         return result
 
 ## -------------------- real-valued distributions  -------------------



More information about the Python-checkins mailing list