Selecting k smallest or largest elements from a large list in python; (benchmarking)
Dmitry Chichkov
dchichkov at gmail.com
Thu Sep 2 05:20:14 EDT 2010
By the way, improving n-ARG-smallest (that returns indexes as well as
values) is actually more desirable than just regular n-smallest:
== Result ==
1.38639092445 nargsmallest
3.1569879055 nargsmallest_numpy_argsort
1.29344892502 nargsmallest_numpy_argmin
Note that numpy array constructor eats around 0.789.
== Code ==
from operator import itemgetter
from random import randint, random
from itertools import islice
from time import time
def nargsmallest(iterable, n):
it = enumerate(iterable)
mins = sorted(islice(it, n), key = itemgetter(1))
loser = mins[-1][1] # largest of smallest
for el in it:
if el[1] <= loser: # NOTE: preserve dups
mins.append(el)
mins.sort(key = itemgetter(1))
mins.pop()
loser = mins[-1][1]
return mins
def nargsmallest_numpy_argsort(iter, k):
distances = N.asarray(iter)
return [(i, distances[i]) for i in distances.argsort()[0:k]]
def nargsmallest_numpy_argmin(iter, k):
mins = []
distances = N.asarray(iter)
for i in xrange(k):
j = distances.argmin()
mins.append((j, distances[j]))
distances[j] = float('inf')
return mins
test_data = [randint(10, 50) + random() for i in range(10000000)]
K = 10
init = time()
mins = nargsmallest(test_data, K)
print time() - init, 'nargsmallest:', mins[:2]
import numpy as N
init = time()
mins = nargsmallest_numpy_argsort(test_data, K)
print time() - init, 'nargsmallest_numpy_argsort:', mins[:2]
init = time()
mins = nargsmallest_numpy_argmin(test_data, K)
print time() - init, 'nargsmallest_numpy_argmin:', mins[:2]
More information about the Python-list
mailing list