Here's my first attempt (see select_items_faster), which almost doubles the speed:
In [12]: print _11
import itertools
def select_items_original(temp_dict, n):
"""Select n items from the dictionary"""
res = []
sort_keys = sorted(temp_dict.keys())
count = 0
for key in sort_keys:
for pair in temp_dict[key]:
if count < n:
res.append(pair)
count += 1
else:
return res
return res
def select_items_faster(temp_dict, n):
"""Select n items from the dictionary"""
items = temp_dict.items()
items.sort()
return list(itertools.chain.from_iterable(val for (_, val) in itertools.islice(items, n)))
test_dict = dict((x, ["a"] * int(x / 500)) for x in range(1000))
test_n = 300
In [13]: %timeit select_items_original(test_dict, test_n)
1000 loops, best of 3: 293 us per loop
In [14]: %timeit select_items_faster(test_dict, test_n)
1000 loops, best of 3: 203 us per loop
Replacing the itertools.islice with a [:n] doesn't really help things:
def select_items_faster_slice(temp_dict, n):
"""Select n items from the dictionary"""
items = temp_dict.items()
items.sort()
return list(itertools.chain.from_iterable(val for (_, val) in items[:n]))
In [16]: %timeit select_items_faster_slice(test_dict, test_n)
1000 loops, best of 3: 210 us per loop
And neither does sorted:
In [18]: %timeit select_items_faster_sorted(test_dict, test_n)
1000 loops, best of 3: 213 us per loop
In [19]: print _17
def select_items_faster_sorted(temp_dict, n):
"""Select n items from the dictionary"""
return list(itertools.chain.from_iterable(val for (_, val) in itertools.islice(sorted(temp_dict.items()), n)))
But a combination of map and __getitem__ is much faster:
In [22]: %timeit select_items_faster_map_getitem(test_dict, test_n)
10000 loops, best of 3: 90.7 us per loop
In [23]: print _20
def select_items_faster_map_getitem(temp_dict, n):
"""Select n items from the dictionary"""
keys = temp_dict.keys()
keys.sort()
return list(itertools.chain.from_iterable(map(temp_dict.__getitem__, keys[:n])))
Replacing the list(itertools.chain.from_iterable) with some magic speeds things up quite a bit:
In [28]: %timeit select_items_faster_map_getitem_list_extend(test_dict, test_n)
10000 loops, best of 3: 74.9 us per loop
In 29: print _27
def select_items_faster_map_getitem_list_extend(temp_dict, n):
"""Select n items from the dictionary"""
keys = temp_dict.keys()
keys.sort()
result = []
filter(result.extend, map(temp_dict.__getitem__, keys[:n]))
return result
And replacing the map and slice with itertools functions squeeze out a tiny bit more speed:
In [31]: %timeit select_items_faster_map_getitem_list_extend_iterables(test_dict, test_n)
10000 loops, best of 3: 72.8 us per loop
In [32]: print _30
def select_items_faster_map_getitem_list_extend_iterables(temp_dict, n):
"""Select n items from the dictionary"""
keys = temp_dict.keys()
keys.sort()
result = []
filter(result.extend, itertools.imap(temp_dict.__getitem__, itertools.islice(keys, n)))
return result
And that is about as fast as I think it can get, because in CPython Python function calls are rather slow, and this minimizes the number of Python function calls that are made in the inner loop.
Note:
- Since the OP didn't provide any hint at what the input data look like, so I had to guess. I could be way off, and this could drastically change the meaning of "fast".
- Every one of my implementations returns n - 1 items, not n.
Edit: Using the same method to profile J.F. Sebastian's code:
In [2]: %timeit select_items_heapq(test_dict, test_n)
1000 loops, best of 3: 572 us per loop
In [3]: print _1
from itertools import *
import heapq
def select_items_heapq(temp_dict, n):
return list(islice(chain.from_iterable(imap(temp_dict.get, heapq.nsmallest(n, temp_dict))),n))
And TokenMacGuy's code:
In [5]: %timeit select_items_tokenmacguy_first(test_dict, test_n)
1000 loops, best of 3: 201 us per loop
In [6]: %timeit select_items_tokenmacguy_second(test_dict, test_n)
1000 loops, best of 3: 730 us per loop
In [7]: print _4
def select_items_tokenmacguy_first(m, n):
k, v, r = m.keys(), m.values(), range(len(m))
r.sort(key=k.__getitem__)
return [v[i] for i in r[:n]]
import heapq
def select_items_tokenmacguy_second(m, n):
k, v, r = m.keys(), m.values(), range(len(m))
smallest = heapq.nsmallest(n, r, k.__getitem__)
for i, ind in enumerate(smallest):
smallest[i] = v[ind]
return smallest