[Python-checkins] r53820 - in python/trunk: Doc/lib/libheapq.tex Lib/heapq.py Lib/test/test_heapq.py Misc/NEWS

raymond.hettinger python-checkins at python.org
Mon Feb 19 05:08:48 CET 2007


Author: raymond.hettinger
Date: Mon Feb 19 05:08:43 2007
New Revision: 53820

Modified:
   python/trunk/Doc/lib/libheapq.tex
   python/trunk/Lib/heapq.py
   python/trunk/Lib/test/test_heapq.py
   python/trunk/Misc/NEWS
Log:
Add merge() function to heapq.

Modified: python/trunk/Doc/lib/libheapq.tex
==============================================================================
--- python/trunk/Doc/lib/libheapq.tex	(original)
+++ python/trunk/Doc/lib/libheapq.tex	Mon Feb 19 05:08:43 2007
@@ -88,7 +88,18 @@
 >>>
 \end{verbatim}
 
-The module also offers two general purpose functions based on heaps.
+The module also offers three general purpose functions based on heaps.
+
+\begin{funcdesc}{merge}{*iterables}
+Merge multiple sorted inputs into a single sorted output (for example, merge
+timestamped entries from multiple log files).  Returns an iterator over
+over the sorted values.
+
+Similar to \code{sorted(itertools.chain(*iterables))} but returns an iterable,
+does not pull the data into memory all at once, and reduces the number of
+comparisons by assuming that each of the input streams is already sorted.            
+\versionadded{2.6}
+\end{funcdesc}
 
 \begin{funcdesc}{nlargest}{n, iterable\optional{, key}}
 Return a list with the \var{n} largest elements from the dataset defined
@@ -110,7 +121,7 @@
 \versionchanged[Added the optional \var{key} argument]{2.5}
 \end{funcdesc}
 
-Both functions perform best for smaller values of \var{n}.  For larger
+The latter two functions perform best for smaller values of \var{n}.  For larger
 values, it is more efficient to use the \function{sorted()} function.  Also,
 when \code{n==1}, it is more efficient to use the builtin \function{min()}
 and \function{max()} functions.

Modified: python/trunk/Lib/heapq.py
==============================================================================
--- python/trunk/Lib/heapq.py	(original)
+++ python/trunk/Lib/heapq.py	Mon Feb 19 05:08:43 2007
@@ -126,8 +126,8 @@
 From all times, sorting has always been a Great Art! :-)
 """
 
-__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'nlargest',
-           'nsmallest']
+__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge',
+           'nlargest', 'nsmallest']
 
 from itertools import islice, repeat, count, imap, izip, tee
 from operator import itemgetter, neg
@@ -308,6 +308,41 @@
 except ImportError:
     pass
 
+def merge(*iterables):
+    '''Merge multiple sorted inputs into a single sorted output.
+
+    Similar to sorted(itertools.chain(*iterables)) but returns an iterable,
+    does not pull the data into memory all at once, and reduces the number
+    of comparisons by assuming that each of the input streams is already sorted.
+
+    >>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]))
+    [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]
+
+    '''
+    _heappop, siftup, _StopIteration = heappop, _siftup, StopIteration
+
+    h = []
+    h_append = h.append
+    for it in map(iter, iterables):
+        try:
+            next = it.next
+            h_append([next(), next])
+        except _StopIteration:
+            pass
+    heapify(h)
+
+    while 1:
+        try:
+            while 1:
+                v, next = s = h[0]      # raises IndexError when h is empty
+                yield v
+                s[0] = next()           # raises StopIteration when exhausted
+                siftup(h, 0)            # restore heap condition
+        except _StopIteration:
+            _heappop(h)                  # remove empty iterator
+        except IndexError:
+            return
+
 # Extend the implementations of nsmallest and nlargest to use a key= argument
 _nsmallest = nsmallest
 def nsmallest(n, iterable, key=None):
@@ -341,3 +376,6 @@
     while heap:
         sort.append(heappop(heap))
     print sort
+
+    import doctest
+    doctest.testmod()

Modified: python/trunk/Lib/test/test_heapq.py
==============================================================================
--- python/trunk/Lib/test/test_heapq.py	(original)
+++ python/trunk/Lib/test/test_heapq.py	Mon Feb 19 05:08:43 2007
@@ -1,6 +1,6 @@
 """Unittests for heapq."""
 
-from heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest
+from heapq import heappush, heappop, heapify, heapreplace, merge, nlargest, nsmallest
 import random
 import unittest
 from test import test_support
@@ -103,6 +103,14 @@
             heap_sorted = [heappop(heap) for i in range(size)]
             self.assertEqual(heap_sorted, sorted(data))
 
+    def test_merge(self):
+        inputs = []
+        for i in xrange(random.randrange(5)):
+            row = sorted(random.randrange(1000) for j in range(random.randrange(10)))
+            inputs.append(row)
+        self.assertEqual(sorted(chain(*inputs)), list(merge(*inputs)))
+        self.assertEqual(list(merge()), [])
+
     def test_nsmallest(self):
         data = [(random.randrange(2000), i) for i in range(1000)]
         for f in (None, lambda x:  x[0] * 547 % 2000):

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Mon Feb 19 05:08:43 2007
@@ -128,6 +128,8 @@
 Library
 -------
 
+- Added heapq.merge() for merging sorted input streams.
+
 - Have the encoding package's search function dynamically import using absolute
   import semantics.
 


More information about the Python-checkins mailing list