[Python-checkins] python/dist/src/Lib/test sortperf.py,1.7,1.8

tim_one@users.sourceforge.net tim_one@users.sourceforge.net
Thu, 18 Jul 2002 08:53:36 -0700


Update of /cvsroot/python/python/dist/src/Lib/test
In directory usw-pr-cvs1:/tmp/cvs-serv6534/python/Lib/test

Modified Files:
	sortperf.py 
Log Message:
Gave this a facelift:  "/" vs "//", whrandom vs random, etc.  Boosted
the default range to end at 2**20 (machines are much faster now).
Fixed what was quite a arguably a bug, explaining an old mystery:  the
"!sort" case here contructs what *was* a quadratic-time disaster for
the old quicksort implementation.  But under the current samplesort, it
always ran much faster than *sort (the random case).  This never made
sense.  Turns out it was because !sort was sorting an integer array,
while all the other cases sort floats; and comparing ints goes much
quicker than comparing floats in Python.  After changing !sort to chew
on floats instead, it's now slower than the random sort case, which
makes more sense (but is just a few percent slower; samplesort is
massively less sensitive to "bad patterns" than quicksort).


Index: sortperf.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/test/sortperf.py,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** sortperf.py	10 Apr 2002 14:54:39 -0000	1.7
--- sortperf.py	18 Jul 2002 15:53:32 -0000	1.8
***************
*** 11,28 ****
  import marshal
  import tempfile
- import operator
  import os
  
  td = tempfile.gettempdir()
  
! def randrange(n):
!     """Return a random shuffle of range(n)."""
      fn = os.path.join(td, "rr%06d" % n)
      try:
          fp = open(fn, "rb")
      except IOError:
!         result = []
!         for i in range(n):
!             result.append(random.random())
          try:
              try:
--- 11,29 ----
  import marshal
  import tempfile
  import os
  
  td = tempfile.gettempdir()
  
! def randfloats(n):
!     """Return a list of n random floats in [0, 1)."""
!     # Generating floats is expensive, so this writes them out to a file in
!     # a temp directory.  If the file already exists, it just reads them
!     # back in and shuffles them a bit.
      fn = os.path.join(td, "rr%06d" % n)
      try:
          fp = open(fn, "rb")
      except IOError:
!         r = random.random
!         result = [r() for i in xrange(n)]
          try:
              try:
***************
*** 42,57 ****
          result = marshal.load(fp)
          fp.close()
-         ##assert len(result) == n
          # Shuffle it a bit...
          for i in range(10):
!             i = random.randrange(0, n)
              temp = result[:i]
              del result[:i]
              temp.reverse()
!             result[len(result):] = temp
              del temp
      return result
  
! def fl():
      sys.stdout.flush()
  
--- 43,58 ----
          result = marshal.load(fp)
          fp.close()
          # Shuffle it a bit...
          for i in range(10):
!             i = random.randrange(n)
              temp = result[:i]
              del result[:i]
              temp.reverse()
!             result.extend(temp)
              del temp
+     assert len(result) == n
      return result
  
! def flush():
      sys.stdout.flush()
  
***************
*** 61,65 ****
      t1 = time.clock()
      print "%6.2f" % (t1-t0),
!     fl()
  
  def tabulate(r):
--- 62,66 ----
      t1 = time.clock()
      print "%6.2f" % (t1-t0),
!     flush()
  
  def tabulate(r):
***************
*** 75,105 ****
      /sort: ascending data
      ~sort: many duplicates
!     -sort: all equal
      !sort: worst case scenario
  
      """
!     cases = ("*sort", "\\sort", "/sort", "~sort", "-sort", "!sort")
!     fmt = ("%2s %6s" + " %6s"*len(cases))
      print fmt % (("i", "2**i") + cases)
      for i in r:
!         n = 1<<i
!         L = randrange(n)
!         ##assert len(L) == n
!         print "%2d %6d" % (i, n),
!         fl()
          doit(L) # *sort
          L.reverse()
          doit(L) # \sort
          doit(L) # /sort
          if n > 4:
              del L[4:]
!             L = L*(n/4)
              L = map(lambda x: --x, L)
          doit(L) # ~sort
          del L
!         L = map(abs, [-0.5]*n)
!         doit(L) # -sort
!         L = range(n/2-1, -1, -1)
!         L[len(L):] = range(n/2)
          doit(L) # !sort
          print
--- 76,123 ----
      /sort: ascending data
      ~sort: many duplicates
!     =sort: all equal
      !sort: worst case scenario
  
      """
!     cases = ("*sort", "\\sort", "/sort", "~sort", "=sort", "!sort")
!     fmt = ("%2s %7s" + " %6s"*len(cases))
      print fmt % (("i", "2**i") + cases)
      for i in r:
!         n = 1 << i
!         L = randfloats(n)
!         print "%2d %7d" % (i, n),
!         flush()
          doit(L) # *sort
          L.reverse()
          doit(L) # \sort
          doit(L) # /sort
+ 
+         # Arrange for lots of duplicates.
          if n > 4:
              del L[4:]
!             L = L * (n // 4)
!             # Force the elements to be distinct objects, else timings can be
!             # artificially low.
              L = map(lambda x: --x, L)
          doit(L) # ~sort
          del L
! 
!         # All equal.  Again, force the elements to be distinct objects.
!         L = map(abs, [-0.5] * n)
!         doit(L) # =sort
!         del L
! 
!         # This one looks like [3, 2, 1, 0, 0, 1, 2, 3].  It was a bad case
!         # for an older implementation of quicksort, which used the median
!         # of the first, last and middle elements as the pivot.  It's still
!         # a worse-than-average case for samplesort, but on the order of a
!         # measly 5% worse, not a quadratic-time disaster as it was with
!         # quicksort.
!         half = n // 2
!         L = range(half - 1, -1, -1)
!         L.extend(range(half))
!         # Force to float, so that the timings are comparable.  This is
!         # significantly faster if we leave tham as ints.
!         L = map(float, L)
          doit(L) # !sort
          print
***************
*** 115,119 ****
      # default range (inclusive)
      k1 = 15
!     k2 = 19
      if sys.argv[1:]:
          # one argument: single point
--- 133,137 ----
      # default range (inclusive)
      k1 = 15
!     k2 = 20
      if sys.argv[1:]:
          # one argument: single point
***************
*** 124,138 ****
              if sys.argv[3:]:
                  # derive random seed from remaining arguments
!                 x, y, z = 0, 0, 0
                  for a in sys.argv[3:]:
!                     h = hash(a)
!                     h, d = divmod(h, 256)
!                     h = h & 0xffffff
!                     x = (x^h^d) & 255
!                     h = h>>8
!                     y = (y^h^d) & 255
!                     h = h>>8
!                     z = (z^h^d) & 255
!                 whrandom.seed(x, y, z)
      r = range(k1, k2+1)                 # include the end point
      tabulate(r)
--- 142,149 ----
              if sys.argv[3:]:
                  # derive random seed from remaining arguments
!                 x = 1
                  for a in sys.argv[3:]:
!                     x = 69069 * x + hash(a)
!                 random.seed(x)
      r = range(k1, k2+1)                 # include the end point
      tabulate(r)