[pypy-svn] r10154 - in pypy/dist/pypy/objspace/std: . test

arigo at codespeak.net arigo at codespeak.net
Tue Mar 29 11:53:02 CEST 2005


Author: arigo
Date: Tue Mar 29 11:53:02 2005
New Revision: 10154

Added:
   pypy/dist/pypy/objspace/std/listsort.py   (contents, props changed)
   pypy/dist/pypy/objspace/std/test/test_listsort.py   (contents, props changed)
Modified:
   pypy/dist/pypy/objspace/std/listobject.py
Log:
Implemented Tim's sort for lists, in listsort.py which is independent from
PyPy and overridden for calling space operations in listobject.py.



Modified: pypy/dist/pypy/objspace/std/listobject.py
==============================================================================
--- pypy/dist/pypy/objspace/std/listobject.py	(original)
+++ pypy/dist/pypy/objspace/std/listobject.py	Tue Mar 29 11:53:02 2005
@@ -4,8 +4,9 @@
 from pypy.objspace.std.tupleobject import W_TupleObject
 
 from pypy.objspace.std import slicetype
-from pypy.interpreter import gateway
+from pypy.interpreter import gateway, baseobjspace
 from pypy.objspace.std.restricted_int import r_int, r_uint
+from pypy.objspace.std.listsort import TimSort
 
 
 class W_ListObject(W_Object):
@@ -514,76 +515,21 @@
         _reverse_slice(w_list.ob_item, 0, w_list.ob_size)
     return space.w_None
 
-    
-
-# Python Quicksort Written by Magnus Lie Hetland
-# http://www.hetland.org/python/quicksort.html
+# ____________________________________________________________
+# Sorting
 
-# NOTE:  we cannot yet detect that a user comparision
-#        function modifies the list in-place.  The
-#        CPython sort() should be studied to learn how
-#        to implement this functionality.
-
-def _partition(list, key_list, start, end, lt):
-    pivot = list[end]
-    key_pivot = key_list[end]                          # Partition around the last value
-    bottom = start-1                           # Start outside the area to be partitioned
-    top = end                                  # Ditto
-
-    done = 0
-    while not done:                            # Until all elements are partitioned...
-
-        while not done:                        # Until we find an out of place element...
-            bottom = bottom+1                  # ... move the bottom up.
-
-            if bottom == top:                  # If we hit the top...
-                done = 1                       # ... we are done.
-                break
-
-            if lt(key_pivot, key_list[bottom]):        # Is the bottom out of place?
-                key_list[top] = key_list[bottom]
-                list[top] = list[bottom]       # Then put it at the top...
-                break                          # ... and start searching from the top.
-
-        while not done:                        # Until we find an out of place element...
-            top = top-1                        # ... move the top down.
-            
-            if top == bottom:                  # If we hit the bottom...
-                done = 1                       # ... we are done.
-                break
-
-            if lt(key_list[top], key_pivot):           # Is the top out of place?
-                key_list[bottom] = key_list[top]
-                list[bottom] = list[top]       # Then put it at the bottom...
-                break                          # ...and start searching from the bottom.
-
-    key_list[top] = key_pivot
-    list[top] = pivot                          # Put the pivot in its place.
-    return top                                 # Return the split point
-
-
-def _quicksort(list, key_list, start, end, lt):
-    """list is the list to be sorted
-    key_list is the list that will be used for comparisions
-    """
-    if start < end:                            # If there are two or more elements...
-        split = _partition(list, key_list, start, end, lt)    # ... partition the sublist...
-        _quicksort(list, key_list, start, split-1, lt)        # ... and sort both halves.
-        _quicksort(list, key_list, split+1, end, lt)
-
-class Comparer:
-    """Just a dumb container class for a space and a w_cmp, because
-    we can't use nested scopes for that in RPython.
-    """
-    def __init__(self, space, w_cmp):
-        self.space = space
-        self.w_cmp = w_cmp
+class KeyContainer(baseobjspace.W_Root):
+    def __init__(self, w_key, w_item):
+        self.w_key = w_key
+        self.w_item = w_item
 
-    def simple_lt(self, a, b):
+class SimpleSort(TimSort):
+    def lt(self, a, b):
         space = self.space
         return space.is_true(space.lt(a, b))
 
-    def complex_lt(self, a, b):
+class CustomCompareSort(TimSort):
+    def lt(self, a, b):
         space = self.space
         w_cmp = self.w_cmp
         w_result = space.call_function(w_cmp, a, b)
@@ -596,45 +542,81 @@
             raise
         return result < 0
 
-def list_sort__List_ANY_ANY_ANY(space, w_list, w_cmp, w_key, w_reverse):
-    comparer = Comparer(space, w_cmp)
-    if w_cmp is space.w_None:
-        lt = comparer.simple_lt
-    else:
-        lt = comparer.complex_lt
-    # The key_list is the result of map(w_key, w_list), and will be
-    # used for comparisons during the qsort
-    if w_key is not space.w_None:
-        key_list = [space.call_function(w_key, item)
-                    for item in w_list.ob_item[:w_list.ob_size]]
-    else:
-        # If no key was specified, then comparison will be made on
-        # the original list
-        key_list = w_list.ob_item
-    # XXX Basic quicksort implementation
-    # XXX this is not stable !!
-    _quicksort(w_list.ob_item, key_list, 0, w_list.ob_size-1, lt)
-    # _quicksort(w_list.ob_item, 0, w_list.ob_size-1, lt)
-    # reverse list if needed
-    if space.is_true(w_reverse):
-        list_reverse__List(space, w_list)
-    return space.w_None
+class CustomKeySort(TimSort):
+    def lt(self, a, b):
+        assert isinstance(a, KeyContainer)
+        assert isinstance(b, KeyContainer)
+        space = self.space
+        return space.is_true(space.lt(a.w_key, b.w_key))
 
+class CustomKeyCompareSort(CustomCompareSort):
+    def lt(self, a, b):
+        assert isinstance(a, KeyContainer)
+        assert isinstance(b, KeyContainer)
+        return CustomCompareSort.lt(self, a.w_key, b.w_key)
+
+SortClass = {
+    (False, False): SimpleSort,
+    (True,  False): CustomCompareSort,
+    (False, True) : CustomKeySort,
+    (True,  True) : CustomKeyCompareSort,
+    }
+
+def list_sort__List_ANY_ANY_ANY(space, w_list, w_cmp, w_keyfunc, w_reverse):
+    has_cmp = not space.is_w(w_cmp, space.w_None)
+    has_key = not space.is_w(w_keyfunc, space.w_None)
+    has_reverse = space.is_true(w_reverse)
+
+    # create and setup a TimSort instance
+    sorterclass = SortClass[has_cmp, has_key]
+    sorter = sorterclass(w_list.ob_item, w_list.ob_size)
+    sorter.space = space
+    sorter.w_cmp = w_cmp
+
+    try:
+        # The list is temporarily made empty, so that mutations performed
+	# by comparison functions can't affect the slice of memory we're
+        # sorting (allowing mutations during sorting is an IndexError or
+        # core-dump factory, since ob_item may change).
+        w_list.clear()
+
+        # wrap each item in a KeyContainer if needed
+        if has_key:
+            for i in range(sorter.listlength):
+                w_item = sorter.list[i]
+                w_key = space.call_function(w_keyfunc, w_item)
+                sorter.list[i] = KeyContainer(w_key, w_item)
+
+        # Reverse sort stability achieved by initially reversing the list,
+	# applying a stable forward sort, then reversing the final result.
+        if has_reverse:
+            _reverse_slice(sorter.list, 0, sorter.listlength)
+
+        # perform the sort
+        sorter.sort()
+
+        # check if the user mucked with the list during the sort
+        if w_list.ob_item:
+            raise OperationError(space.w_ValueError,
+                                 space.wrap("list modified during sort"))
+
+    finally:
+        # unwrap each item if needed
+        if has_key:
+            for i in range(sorter.listlength):
+                w_obj = sorter.list[i]
+                if isinstance(w_obj, KeyContainer):
+                    sorter.list[i] = w_obj.w_item
+
+        if has_reverse:
+            _reverse_slice(sorter.list, 0, sorter.listlength)
+
+        # put the items back into the list
+        w_list.ob_item = sorter.list
+        w_list.ob_size = sorter.listlength
+
+    return space.w_None
 
-"""
-static PyMethodDef list_methods[] = {
-    {"append",  (PyCFunction)listappend,  METH_O, append_doc},
-    {"insert",  (PyCFunction)listinsert,  METH_VARARGS, insert_doc},
-    {"extend",      (PyCFunction)listextend,  METH_O, extend_doc},
-    {"pop",     (PyCFunction)listpop,     METH_VARARGS, pop_doc},
-    {"remove",  (PyCFunction)listremove,  METH_O, remove_doc},
-    {"index",   (PyCFunction)listindex,   METH_O, index_doc},
-    {"count",   (PyCFunction)listcount,   METH_O, count_doc},
-    {"reverse", (PyCFunction)listreverse, METH_NOARGS, reverse_doc},
-    {"sort",    (PyCFunction)listsort,    METH_VARARGS, sort_doc},
-    {NULL,      NULL}       /* sentinel */
-};
-"""
 
 from pypy.objspace.std import listtype
 register_all(vars(), listtype)

Added: pypy/dist/pypy/objspace/std/listsort.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/objspace/std/listsort.py	Tue Mar 29 11:53:02 2005
@@ -0,0 +1,575 @@
+
+## ------------------------------------------------------------------------
+## Lots of code for an adaptive, stable, natural mergesort.  There are many
+## pieces to this algorithm; read listsort.txt for overviews and details.
+## ------------------------------------------------------------------------
+##         Adapted from CPython, original code and algorithms by Tim Peters
+
+
+class TimSort:
+    """TimSort(list).sort()
+
+    Sorts the list in-place, using the overridable method lt() for comparison.
+    """
+
+    def __init__(self, list, listlength=None):
+        self.list = list
+        if listlength is None:
+            listlength = len(list)
+        self.listlength = listlength
+
+    def lt(self, a, b):
+        return a < b
+
+    def le(self, a, b):
+        return not self.lt(b, a)   # always use self.lt() as the primitive
+
+    # binarysort is the best method for sorting small arrays: it does
+    # few compares, but can do data movement quadratic in the number of
+    # elements.
+    # "a" is a contiguous slice of a list, and is sorted via binary insertion.
+    # This sort is stable.
+    # On entry, the first "sorted" elements are already sorted.
+    # Even in case of error, the output slice will be some permutation of
+    # the input (nothing is lost or duplicated).
+
+    def binarysort(self, a, sorted=1):
+        for start in xrange(a.base + sorted, a.base + a.len):
+            # set l to where list[start] belongs
+            l = a.base
+            r = start
+            pivot = a.list[r]
+            # Invariants:
+            # pivot >= all in [base, l).
+            # pivot  < all in [r, start).
+            # The second is vacuously true at the start.
+            while l < r:
+                p = l + ((r - l) >> 1)
+                if self.lt(pivot, a.list[p]):
+                    r = p
+                else:
+                    l = p+1
+            assert l == r
+            # The invariants still hold, so pivot >= all in [base, l) and
+            # pivot < all in [l, start), so pivot belongs at l.  Note
+            # that if there are elements equal to pivot, l points to the
+            # first slot after them -- that's why this sort is stable.
+            # Slide over to make room.
+            for p in xrange(start, l, -1):
+                a.list[p] = a.list[p-1]
+            a.list[l] = pivot
+
+    # Compute the length of the run in the slice "a".
+    # "A run" is the longest ascending sequence, with
+    #
+    #     a[0] <= a[1] <= a[2] <= ...
+    #
+    # or the longest descending sequence, with
+    #
+    #     a[0] > a[1] > a[2] > ...
+    #
+    # Return (run, descending) where descending is False in the former case,
+    # or True in the latter.
+    # For its intended use in a stable mergesort, the strictness of the defn of
+    # "descending" is needed so that the caller can safely reverse a descending
+    # sequence without violating stability (strict > ensures there are no equal
+    # elements to get out of order).
+
+    def count_run(self, a):
+        if a.len <= 1:
+            n = a.len
+            descending = False
+        else:
+            n = 2
+            if self.lt(a.list[a.base + 1], a.list[a.base]):
+                descending = True
+                for p in xrange(a.base + 2, a.base + a.len):
+                    if self.lt(a.list[p], a.list[p-1]):
+                        n += 1
+                    else:
+                        break
+            else:
+                descending = False
+                for p in xrange(a.base + 2, a.base + a.len):
+                    if self.lt(a.list[p], a.list[p-1]):
+                        break
+                    else:
+                        n += 1
+        return ListSlice(a.list, a.base, n), descending
+
+    # Locate the proper position of key in a sorted vector; if the vector
+    # contains an element equal to key, return the position immediately to the
+    # left of the leftmost equal element -- or to the right of the rightmost
+    # equal element if the flag "rightmost" is set.
+    #
+    # "hint" is an index at which to begin the search, 0 <= hint < a.len.
+    # The closer hint is to the final result, the faster this runs.
+    #
+    # The return value is the index 0 <= k <= a.len such that
+    #
+    #     a[k-1] < key <= a[k]      (if rightmost is False)
+    #     a[k-1] <= key < a[k]      (if rightmost is True)
+    #
+    # as long as the indices are in bound.  IOW, key belongs at index k;
+    # or, IOW, the first k elements of a should precede key, and the last
+    # n-k should follow key.
+
+    def gallop(self, key, a, hint, rightmost):
+        assert 0 <= hint < a.len
+        if rightmost:
+            lower = self.le   # search for the largest k for which a[k] <= key
+        else:
+            lower = self.lt   # search for the largest k for which a[k] < key
+
+        p = a.base + hint
+        lastofs = 0
+        ofs = 1
+        if lower(a.list[p], key):
+            # a[hint] < key -- gallop right, until
+            #     a[hint + lastofs] < key <= a[hint + ofs]
+
+            maxofs = a.len - hint     # a[a.len-1] is highest
+            while ofs < maxofs:
+                if lower(a.list[p + ofs], key):
+                    lastofs = ofs
+                    try:
+                        ofs = (ofs << 1) + 1
+                    except OverflowError:
+                        ofs = maxofs
+                else:  # key <= a[hint + ofs]
+                    break
+
+            if ofs > maxofs:
+                ofs = maxofs
+            # Translate back to offsets relative to a.
+            lastofs += hint
+            ofs += hint
+
+	else:
+            # key <= a[hint] -- gallop left, until
+            #     a[hint - ofs] < key <= a[hint - lastofs]
+            maxofs = hint + 1   # a[0] is lowest
+            while ofs < maxofs:
+                if lower(a.list[p - ofs], key):
+                    break
+                else:
+                    # key <= a[hint - ofs]
+                    lastofs = ofs
+                    try:
+			ofs = (ofs << 1) + 1
+                    except OverflowError:
+                        ofs = maxofs
+            if ofs > maxofs:
+                ofs = maxofs
+            # Translate back to positive offsets relative to a.
+            lastofs, ofs = hint-ofs, hint-lastofs
+
+	assert -1 <= lastofs < ofs <= a.len
+
+	# Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
+	# right of lastofs but no farther right than ofs.  Do a binary
+	# search, with invariant a[lastofs-1] < key <= a[ofs].
+        
+	lastofs += 1
+        while lastofs < ofs:
+            m = lastofs + ((ofs - lastofs) >> 1)
+            if lower(a.list[a.base + m], key):
+                lastofs = m+1	# a[m] < key
+            else:
+                ofs = m         # key <= a[m]
+
+	assert lastofs == ofs         # so a[ofs-1] < key <= a[ofs]
+	return ofs
+
+    # ____________________________________________________________
+
+    # When we get into galloping mode, we stay there until both runs win less
+    # often than MIN_GALLOP consecutive times.  See listsort.txt for more info.
+    MIN_GALLOP = 7
+
+    def merge_init(self):
+	# This controls when we get *into* galloping mode.  It's initialized
+        # to MIN_GALLOP.  merge_lo and merge_hi tend to nudge it higher for
+        # random data, and lower for highly structured data.
+        self.min_gallop = self.MIN_GALLOP
+
+	# A stack of n pending runs yet to be merged.  Run #i starts at
+        # address pending[i].base and extends for pending[i].len elements.
+        # It's always true (so long as the indices are in bounds) that
+        #
+        #     pending[i].base + pending[i].len == pending[i+1].base
+        #
+        # so we could cut the storage for this, but it's a minor amount,
+        # and keeping all the info explicit simplifies the code.
+        self.pending = []
+
+    # Merge the slice "a" with the slice "b" in a stable way, in-place.
+    # a.len and b.len must be > 0, and a.base + a.len == b.base.
+    # Must also have that b.list[b.base] < a.list[a.base], that
+    # a.list[a.base+a.len-1] belongs at the end of the merge, and should have
+    # a.len <= b.len.  See listsort.txt for more info.
+
+    def merge_lo(self, a, b):
+        assert a.len > 0 and b.len > 0 and a.base + a.len == b.base
+        min_gallop = self.min_gallop
+        dest = a.base
+        a = a.copyitems()
+
+        # Invariant: elements in "a" are waiting to be reinserted into the list
+        # at "dest".  They should be merged with the elements of "b".
+        # b.base == dest + a.len.
+        # We use a finally block to ensure that the elements remaining in
+        # the copy "a" are reinserted back into self.list in all cases.
+        try:
+            self.list[dest] = b.popleft()
+            dest += 1
+            if a.len == 1 or b.len == 0:
+                return
+
+            while True:
+		acount = 0   # number of times A won in a row
+		bcount = 0   # number of times B won in a row
+
+		# Do the straightforward thing until (if ever) one run
+                # appears to win consistently.
+                while True:
+                    if self.lt(b.list[b.base], a.list[a.base]):
+                        self.list[dest] = b.popleft()
+                        dest += 1
+                        if b.len == 0:
+                            return
+                        bcount += 1
+                        acount = 0
+                        if bcount >= min_gallop:
+                            break
+                    else:
+                        self.list[dest] = a.popleft()
+                        dest += 1
+                        if a.len == 1:
+                            return
+                        acount += 1
+                        bcount = 0
+                        if acount >= min_gallop:
+                            break
+
+		# One run is winning so consistently that galloping may
+                # be a huge win.  So try that, and continue galloping until
+                # (if ever) neither run appears to be winning consistently
+                # anymore.
+		min_gallop += 1
+
+                while True:
+                    min_gallop -= min_gallop > 1
+                    self.min_gallop = min_gallop
+
+                    acount = self.gallop(b.list[b.base], a, hint=0,
+                                         rightmost=True)
+                    for p in xrange(a.base, a.base + acount):
+                        self.list[dest] = a.list[p]
+                        dest += 1
+                    a.advance(acount)
+                    # a.len==0 is impossible now if the comparison
+                    # function is consistent, but we can't assume
+                    # that it is.
+                    if a.len <= 1:
+                        return
+
+                    self.list[dest] = b.popleft()
+                    dest += 1
+                    if b.len == 0:
+                        return
+
+                    bcount = self.gallop(a.list[a.base], b, hint=0,
+                                         rightmost=False)
+                    for p in xrange(b.base, b.base + bcount):
+                        self.list[dest] = b.list[p]
+                        dest += 1
+                    b.advance(bcount)
+                    if b.len == 0:
+                        return
+
+                    self.list[dest] = a.popleft()
+                    dest += 1
+                    if a.len == 1:
+                        return
+
+                    if acount < self.MIN_GALLOP and bcount < self.MIN_GALLOP:
+                        break
+
+ 		min_gallop += 1  # penalize it for leaving galloping mode
+ 		self.min_gallop = min_gallop
+
+        finally:
+            # The last element of a belongs at the end of the merge, so we copy
+            # the remaining elements of b before the remaining elements of a.
+            assert a.len >= 0 and b.len >= 0
+            for p in xrange(b.base, b.base + b.len):
+                self.list[dest] = b.list[p]
+                dest += 1
+            for p in xrange(a.base, a.base + a.len):
+                self.list[dest] = a.list[p]
+                dest += 1
+
+    # Same as merge_lo(), but should have a.len >= b.len.
+
+    def merge_hi(self, a, b):
+        assert a.len > 0 and b.len > 0 and a.base + a.len == b.base
+        min_gallop = self.min_gallop
+        dest = b.base + b.len
+        b = b.copyitems()
+
+        # Invariant: elements in "b" are waiting to be reinserted into the list
+        # before "dest".  They should be merged with the elements of "a".
+        # a.base + a.len == dest - b.len.
+        # We use a finally block to ensure that the elements remaining in
+        # the copy "b" are reinserted back into self.list in all cases.
+        try:
+            dest -= 1
+            self.list[dest] = a.popright()
+            if a.len == 0 or b.len == 1:
+                return
+
+            while True:
+		acount = 0   # number of times A won in a row
+		bcount = 0   # number of times B won in a row
+
+		# Do the straightforward thing until (if ever) one run
+                # appears to win consistently.
+                while True:
+                    nexta = a.list[a.base + a.len - 1]
+                    nextb = b.list[b.base + b.len - 1]
+                    if self.lt(nextb, nexta):
+                        dest -= 1
+                        self.list[dest] = nexta
+                        a.len -= 1
+                        if a.len == 0:
+                            return
+                        acount += 1
+                        bcount = 0
+                        if acount >= min_gallop:
+                            break
+                    else:
+                        dest -= 1
+                        self.list[dest] = nextb
+                        b.len -= 1
+                        if b.len == 1:
+                            return
+                        bcount += 1
+                        acount = 0
+                        if bcount >= min_gallop:
+                            break
+
+		# One run is winning so consistently that galloping may
+                # be a huge win.  So try that, and continue galloping until
+                # (if ever) neither run appears to be winning consistently
+                # anymore.
+		min_gallop += 1
+
+                while True:
+                    min_gallop -= min_gallop > 1
+                    self.min_gallop = min_gallop
+
+                    nextb = b.list[b.base + b.len - 1]
+                    k = self.gallop(nextb, a, hint=a.len-1, rightmost=True)
+                    acount = a.len - k
+                    for p in xrange(a.base + a.len - 1, a.base + k - 1, -1):
+                        dest -= 1
+                        self.list[dest] = a.list[p]
+                    a.len -= acount
+                    if a.len == 0:
+                        return
+
+                    dest -= 1
+                    self.list[dest] = b.popright()
+                    if b.len == 1:
+                        return
+
+                    nexta = a.list[a.base + a.len - 1]
+                    k = self.gallop(nexta, b, hint=b.len-1, rightmost=False)
+                    bcount = b.len - k
+                    for p in xrange(b.base + b.len - 1, b.base + k - 1, -1):
+                        dest -= 1
+                        self.list[dest] = b.list[p]
+                    b.len -= bcount
+                    # b.len==0 is impossible now if the comparison
+                    # function is consistent, but we can't assume
+                    # that it is.
+                    if b.len <= 1:
+                        return
+
+                    dest -= 1
+                    self.list[dest] = a.popright()
+                    if a.len == 0:
+                        return
+
+                    if acount < self.MIN_GALLOP and bcount < self.MIN_GALLOP:
+                        break
+
+ 		min_gallop += 1  # penalize it for leaving galloping mode
+ 		self.min_gallop = min_gallop
+
+        finally:
+            # The last element of a belongs at the end of the merge, so we copy
+            # the remaining elements of a and then the remaining elements of b.
+            assert a.len >= 0 and b.len >= 0
+            for p in xrange(a.base + a.len - 1, a.base - 1, -1):
+                dest -= 1
+                self.list[dest] = a.list[p]
+            for p in xrange(b.base + b.len - 1, b.base - 1, -1):
+                dest -= 1
+                self.list[dest] = b.list[p]
+
+    # Merge the two runs at stack indices i and i+1.
+
+    def merge_at(self, i):
+        a = self.pending[i]
+        b = self.pending[i+1]
+        assert a.len > 0 and b.len > 0
+        assert a.base + a.len == b.base
+
+	# Record the length of the combined runs and remove the run b
+        self.pending[i] = ListSlice(self.list, a.base, a.len + b.len)
+        del self.pending[i+1]
+
+	# Where does b start in a?  Elements in a before that can be
+	# ignored (already in place).
+        k = self.gallop(b.list[b.base], a, hint=0, rightmost=True)
+        a.advance(k)
+        if a.len == 0:
+            return
+
+        # Where does a end in b?  Elements in b after that can be
+        # ignored (already in place).
+	b.len = self.gallop(a.list[a.base+a.len-1], b, hint=b.len-1,
+                            rightmost=False)
+        if b.len == 0:
+            return
+
+	# Merge what remains of the runs.  The direction is chosen to
+        # minimize the temporary storage needed.
+        if a.len <= b.len:
+            self.merge_lo(a, b)
+	else:
+            self.merge_hi(a, b)
+
+    # Examine the stack of runs waiting to be merged, merging adjacent runs
+    # until the stack invariants are re-established:
+    #
+    # 1. len[-3] > len[-2] + len[-1]
+    # 2. len[-2] > len[-1]
+    #
+    # See listsort.txt for more info.
+
+    def merge_collapse(self):
+        p = self.pending
+        while len(p) > 1:
+            if len(p) >= 3 and p[-3].len <= p[-2].len + p[-1].len:
+                if p[-3].len < p[-1].len:
+                    self.merge_at(-3)
+                else:
+                    self.merge_at(-2)
+            elif p[-2].len <= p[-1].len:
+                self.merge_at(-2)
+            else:
+                break
+
+    # Regardless of invariants, merge all runs on the stack until only one
+    # remains.  This is used at the end of the mergesort.
+
+    def merge_force_collapse(self):
+        p = self.pending
+        while len(p) > 1:
+            if len(p) >= 3 and p[-3].len < p[-1].len:
+                self.merge_at(-3)
+            else:
+                self.merge_at(-2)
+
+    # Compute a good value for the minimum run length; natural runs shorter
+    # than this are boosted artificially via binary insertion.
+    #
+    # If n < 64, return n (it's too small to bother with fancy stuff).
+    # Else if n is an exact power of 2, return 32.
+    # Else return an int k, 32 <= k <= 64, such that n/k is close to, but
+    # strictly less than, an exact power of 2.
+    #
+    # See listsort.txt for more info.
+
+    def merge_compute_minrun(self, n):
+	r = 0    # becomes 1 if any 1 bits are shifted off
+        while n >= 64:
+            r |= n & 1
+            n >>= 1
+	return n + r
+
+    # ____________________________________________________________
+    # Entry point.
+
+    def sort(self):
+        remaining = ListSlice(self.list, 0, self.listlength)
+        if remaining.len < 2:
+            return
+
+	# March over the array once, left to right, finding natural runs,
+	# and extending short natural runs to minrun elements.
+        self.merge_init()
+	minrun = self.merge_compute_minrun(remaining.len)
+
+        while remaining.len > 0:
+            # Identify next run.
+            run, descending = self.count_run(remaining)
+            if descending:
+                run.reverse()
+            # If short, extend to min(minrun, nremaining).
+            if run.len < minrun:
+                sorted = run.len
+                run.len = min(minrun, remaining.len)
+                self.binarysort(run, sorted)
+            # Advance remaining past this run.
+            remaining.advance(run.len)
+            # Push run onto pending-runs stack, and maybe merge.
+            self.pending.append(run)
+            self.merge_collapse()
+
+        assert remaining.base == self.listlength
+
+	self.merge_force_collapse()
+	assert len(self.pending) == 1
+        assert self.pending[0].base == 0
+        assert self.pending[0].len == self.listlength
+
+
+class ListSlice:
+    "A sublist of a list."
+
+    def __init__(self, list, base, len):
+        self.list = list
+        self.base = base
+        self.len  = len
+
+    def copyitems(self):
+        "Make a copy of the slice of the original list."
+        return ListSlice(self.list[self.base:self.base+self.len], 0, self.len)
+
+    def advance(self, n):
+        self.base += n
+        self.len -= n
+
+    def popleft(self):
+        result = self.list[self.base]
+        self.base += 1
+        self.len -= 1
+        return result
+
+    def popright(self):
+        self.len -= 1
+        return self.list[self.base + self.len]
+
+    def reverse(self):
+        "Reverse the slice in-place."
+        list = self.list
+        lo = self.base
+        hi = lo + self.len - 1
+        while lo < hi:
+            list[lo], list[hi] = list[hi], list[lo]
+            lo += 1
+            hi -= 1

Added: pypy/dist/pypy/objspace/std/test/test_listsort.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/objspace/std/test/test_listsort.py	Tue Mar 29 11:53:02 2005
@@ -0,0 +1,43 @@
+import autopath
+from pypy.objspace.std.listsort import TimSort
+import random, os
+
+def makeset(lst):
+    result = {}
+    for a in lst:
+        result.setdefault(id(a), []).append(True)
+    return result
+
+def sorttest(lst1):
+    lst2 = lst1[:]
+    TimSort(lst2).sort()
+    assert len(lst1) == len(lst2)
+    assert makeset(lst1) == makeset(lst2)
+    position = {}
+    i = 0
+    for a in lst1:
+        position.setdefault(id(a), []).append(i)
+        i += 1
+    for i in range(len(lst2)-1):
+        a, b = lst2[i], lst2[i+1]
+        assert a <= b, "resulting list is not sorted"
+        if a == b:
+            assert position[id(a)][0] < position[id(b)][-1], "not stable"
+
+
+class C(int):
+    pass
+
+def test_v():
+    for v in range(137):
+        up = 1 + int(v * random.random() * 2.7)
+        lst1 = [C(random.randrange(0, up)) for i in range(v)]
+        sorttest(lst1)
+
+def test_file():
+    for fn in os.listdir(autopath.this_dir):
+        if fn.endswith('.py'):
+            f = open(os.path.join(autopath.this_dir, fn), 'r')
+            lines1 = f.readlines()
+            f.close()
+            sorttest(lines1)



More information about the Pypy-commit mailing list