[Python-checkins] r53820 - in python/trunk: Doc/lib/libheapq.tex Lib/heapq.py Lib/test/test_heapq.py Misc/NEWS
raymond.hettinger
python-checkins at python.org
Mon Feb 19 05:08:48 CET 2007
Author: raymond.hettinger
Date: Mon Feb 19 05:08:43 2007
New Revision: 53820
Modified:
python/trunk/Doc/lib/libheapq.tex
python/trunk/Lib/heapq.py
python/trunk/Lib/test/test_heapq.py
python/trunk/Misc/NEWS
Log:
Add merge() function to heapq.
Modified: python/trunk/Doc/lib/libheapq.tex
==============================================================================
--- python/trunk/Doc/lib/libheapq.tex (original)
+++ python/trunk/Doc/lib/libheapq.tex Mon Feb 19 05:08:43 2007
@@ -88,7 +88,18 @@
>>>
\end{verbatim}
-The module also offers two general purpose functions based on heaps.
+The module also offers three general purpose functions based on heaps.
+
+\begin{funcdesc}{merge}{*iterables}
+Merge multiple sorted inputs into a single sorted output (for example, merge
+timestamped entries from multiple log files). Returns an iterator over
+over the sorted values.
+
+Similar to \code{sorted(itertools.chain(*iterables))} but returns an iterable,
+does not pull the data into memory all at once, and reduces the number of
+comparisons by assuming that each of the input streams is already sorted.
+\versionadded{2.6}
+\end{funcdesc}
\begin{funcdesc}{nlargest}{n, iterable\optional{, key}}
Return a list with the \var{n} largest elements from the dataset defined
@@ -110,7 +121,7 @@
\versionchanged[Added the optional \var{key} argument]{2.5}
\end{funcdesc}
-Both functions perform best for smaller values of \var{n}. For larger
+The latter two functions perform best for smaller values of \var{n}. For larger
values, it is more efficient to use the \function{sorted()} function. Also,
when \code{n==1}, it is more efficient to use the builtin \function{min()}
and \function{max()} functions.
Modified: python/trunk/Lib/heapq.py
==============================================================================
--- python/trunk/Lib/heapq.py (original)
+++ python/trunk/Lib/heapq.py Mon Feb 19 05:08:43 2007
@@ -126,8 +126,8 @@
From all times, sorting has always been a Great Art! :-)
"""
-__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'nlargest',
- 'nsmallest']
+__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge',
+ 'nlargest', 'nsmallest']
from itertools import islice, repeat, count, imap, izip, tee
from operator import itemgetter, neg
@@ -308,6 +308,41 @@
except ImportError:
pass
+def merge(*iterables):
+ '''Merge multiple sorted inputs into a single sorted output.
+
+ Similar to sorted(itertools.chain(*iterables)) but returns an iterable,
+ does not pull the data into memory all at once, and reduces the number
+ of comparisons by assuming that each of the input streams is already sorted.
+
+ >>> list(merge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]))
+ [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]
+
+ '''
+ _heappop, siftup, _StopIteration = heappop, _siftup, StopIteration
+
+ h = []
+ h_append = h.append
+ for it in map(iter, iterables):
+ try:
+ next = it.next
+ h_append([next(), next])
+ except _StopIteration:
+ pass
+ heapify(h)
+
+ while 1:
+ try:
+ while 1:
+ v, next = s = h[0] # raises IndexError when h is empty
+ yield v
+ s[0] = next() # raises StopIteration when exhausted
+ siftup(h, 0) # restore heap condition
+ except _StopIteration:
+ _heappop(h) # remove empty iterator
+ except IndexError:
+ return
+
# Extend the implementations of nsmallest and nlargest to use a key= argument
_nsmallest = nsmallest
def nsmallest(n, iterable, key=None):
@@ -341,3 +376,6 @@
while heap:
sort.append(heappop(heap))
print sort
+
+ import doctest
+ doctest.testmod()
Modified: python/trunk/Lib/test/test_heapq.py
==============================================================================
--- python/trunk/Lib/test/test_heapq.py (original)
+++ python/trunk/Lib/test/test_heapq.py Mon Feb 19 05:08:43 2007
@@ -1,6 +1,6 @@
"""Unittests for heapq."""
-from heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest
+from heapq import heappush, heappop, heapify, heapreplace, merge, nlargest, nsmallest
import random
import unittest
from test import test_support
@@ -103,6 +103,14 @@
heap_sorted = [heappop(heap) for i in range(size)]
self.assertEqual(heap_sorted, sorted(data))
+ def test_merge(self):
+ inputs = []
+ for i in xrange(random.randrange(5)):
+ row = sorted(random.randrange(1000) for j in range(random.randrange(10)))
+ inputs.append(row)
+ self.assertEqual(sorted(chain(*inputs)), list(merge(*inputs)))
+ self.assertEqual(list(merge()), [])
+
def test_nsmallest(self):
data = [(random.randrange(2000), i) for i in range(1000)]
for f in (None, lambda x: x[0] * 547 % 2000):
Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS (original)
+++ python/trunk/Misc/NEWS Mon Feb 19 05:08:43 2007
@@ -128,6 +128,8 @@
Library
-------
+- Added heapq.merge() for merging sorted input streams.
+
- Have the encoding package's search function dynamically import using absolute
import semantics.
More information about the Python-checkins
mailing list