# Selecting k smallest or largest elements from a large list in python; (benchmarking)

Dmitry Chichkov dchichkov at gmail.com
Thu Sep 2 11:20:14 CEST 2010

```By the way, improving n-ARG-smallest (that returns indexes as well as
values) is actually more desirable than just regular n-smallest:

== Result ==
1.38639092445 nargsmallest
3.1569879055 nargsmallest_numpy_argsort
1.29344892502 nargsmallest_numpy_argmin

Note that numpy array constructor eats around 0.789.

== Code ==
from operator import itemgetter
from random import randint, random
from itertools import islice
from time import time

def nargsmallest(iterable, n):
it   = enumerate(iterable)
mins = sorted(islice(it, n), key = itemgetter(1))
loser = mins[-1][1]         # largest of smallest
for el in it:
if el[1] <= loser:      # NOTE: preserve dups
mins.append(el)
mins.sort(key = itemgetter(1))
mins.pop()
loser = mins[-1][1]
return mins

def nargsmallest_numpy_argsort(iter, k):
distances = N.asarray(iter)
return [(i, distances[i]) for i in distances.argsort()[0:k]]

def nargsmallest_numpy_argmin(iter, k):
mins = []
distances = N.asarray(iter)
for i in xrange(k):
j = distances.argmin()
mins.append((j, distances[j]))
distances[j] = float('inf')
return mins

test_data = [randint(10, 50) + random() for i in range(10000000)]
K = 10

init = time()
mins = nargsmallest(test_data, K)
print time() - init, 'nargsmallest:', mins[:2]

import numpy as N
init = time()
mins = nargsmallest_numpy_argsort(test_data, K)
print time() - init, 'nargsmallest_numpy_argsort:', mins[:2]

init = time()
mins = nargsmallest_numpy_argmin(test_data, K)
print time() - init, 'nargsmallest_numpy_argmin:', mins[:2]

```