[Python-checkins] r68546 - python/trunk/Lib/heapq.py

raymond.hettinger python-checkins at python.org
Mon Jan 12 11:37:32 CET 2009


Author: raymond.hettinger
Date: Mon Jan 12 11:37:32 2009
New Revision: 68546

Log:
Optimize heapq.nsmallest/nlargest for cases where n==1 or n>=size.

Modified:
   python/trunk/Lib/heapq.py

Modified: python/trunk/Lib/heapq.py
==============================================================================
--- python/trunk/Lib/heapq.py	(original)
+++ python/trunk/Lib/heapq.py	Mon Jan 12 11:37:32 2009
@@ -129,7 +129,7 @@
 __all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'merge',
            'nlargest', 'nsmallest', 'heappushpop']
 
-from itertools import islice, repeat, count, imap, izip, tee
+from itertools import islice, repeat, count, imap, izip, tee, chain
 from operator import itemgetter, neg
 import bisect
 
@@ -354,10 +354,32 @@
 
     Equivalent to:  sorted(iterable, key=key)[:n]
     """
+    # Short-cut for n==1 is to use min() when len(iterable)>0
+    if n == 1:
+        it = iter(iterable)
+        head = list(islice(it, 1))
+        if not head:
+            return []
+        if key is None:
+            return [min(chain(head, it))]
+        return [min(chain(head, it), key=key)]
+
+    # When n>=size, it's faster to use sort()
+    try:
+        size = len(iterable)
+    except (TypeError, AttributeError):
+        pass
+    else:
+        if n >= size:
+            return sorted(iterable, key=key)[:n]
+
+    # When key is none, use simpler decoration
     if key is None:
         it = izip(iterable, count())                        # decorate
         result = _nsmallest(n, it)
         return map(itemgetter(0), result)                   # undecorate
+
+    # General case, slowest method
     in1, in2 = tee(iterable)
     it = izip(imap(key, in1), count(), in2)                 # decorate
     result = _nsmallest(n, it)
@@ -369,10 +391,33 @@
 
     Equivalent to:  sorted(iterable, key=key, reverse=True)[:n]
     """
+
+    # Short-cut for n==1 is to use max() when len(iterable)>0
+    if n == 1:
+        it = iter(iterable)
+        head = list(islice(it, 1))
+        if not head:
+            return []
+        if key is None:
+            return [max(chain(head, it))]
+        return [max(chain(head, it), key=key)]
+
+    # When n>=size, it's faster to use sort()
+    try:
+        size = len(iterable)
+    except (TypeError, AttributeError):
+        pass
+    else:
+        if n >= size:
+            return sorted(iterable, key=key, reverse=True)[:n]
+
+    # When key is none, use simpler decoration
     if key is None:
         it = izip(iterable, imap(neg, count()))             # decorate
         result = _nlargest(n, it)
         return map(itemgetter(0), result)                   # undecorate
+
+    # General case, slowest method
     in1, in2 = tee(iterable)
     it = izip(imap(key, in1), imap(neg, count()), in2)      # decorate
     result = _nlargest(n, it)


More information about the Python-checkins mailing list