def iteruniq(iterable, key=None): memo = list() add = memo.append if key is None: def predicate(item): if item in memo: return False add(item) return True else: def predicate(actual_item): item = key(actual_item) if item in memo: return False add(item) return True return filter(predicate, iterable) def best_case_iteruniq(iterable, key=None): memo_hash = set() memo_else = list() add_hash = memo_hash.add add_else = memo_else.append if key is None: def predicate(item): try: hash(item) except TypeError: if item in memo_else: return False add_else(item) return True else: if item in memo_hash: return False add_hash(item) return True else: def predicate(actual_item): item = key(actual_item) try: hash(item) except TypeError: if item in memo_else: return False add_else(item) return True else: if item in memo_hash: return False add_hash(item) return True return filter(predicate, iterable) import numpy from collections import deque from timeit import timeit def print_line(name, a1, a2, a3, a4, a5): s1 = "{:.4f}".format(a1/base)[:4] s2 = "{:.4f}".format(a2/base)[:4] s3 = "{:.4f}".format(a3/base)[:4] s4 = "{:.4f}".format(a4/base)[:4] s5 = "{:.4f}".format(a5/base)[:4] print("{:<14}{} {} {} {} {}".format(name, s1, s2, s3, s4, s5)) for number_of_elements in (4**r for r in range(2, 8)): print("Using {} elements".format(number_of_elements)) print() nsA = [int(x) for x in numpy.random.randint(number_of_elements/5, size=number_of_elements)] str_map_B = numpy.random.randint(10, size=1000000) nsB = [(n if x else [n]) for (n, x) in zip(nsA, str_map_B)] str_map_C = numpy.random.randint(3, size=1000000) nsC = [(n if x else [n]) for (n, x) in zip(nsA, str_map_C)] str_map_D = numpy.random.randint(3, size=1000000) nsD = [([n] if x else n) for (n, x) in zip(nsA, str_map_D)] nsE = [[n] for n in nsA] print("% Hashable: 100 90 66 33 0") a1 = timeit("deque(iteruniq(nsA), maxlen=0)", setup="from __main__ import deque, nsA, iteruniq", number=1) a2 = timeit("deque(iteruniq(nsB), maxlen=0)", setup="from __main__ import deque, nsB, iteruniq", number=1) a3 = timeit("deque(iteruniq(nsC), maxlen=0)", setup="from __main__ import deque, nsC, iteruniq", number=1) a4 = timeit("deque(iteruniq(nsD), maxlen=0)", setup="from __main__ import deque, nsD, iteruniq", number=1) a5 = timeit("deque(iteruniq(nsE), maxlen=0)", setup="from __main__ import deque, nsE, iteruniq", number=1) b1 = timeit("deque(iteruniq(nsA, key=str), maxlen=0)", setup="from __main__ import deque, nsA, iteruniq", number=1) b2 = timeit("deque(iteruniq(nsB, key=str), maxlen=0)", setup="from __main__ import deque, nsB, iteruniq", number=1) b3 = timeit("deque(iteruniq(nsC, key=str), maxlen=0)", setup="from __main__ import deque, nsC, iteruniq", number=1) b4 = timeit("deque(iteruniq(nsD, key=str), maxlen=0)", setup="from __main__ import deque, nsD, iteruniq", number=1) b5 = timeit("deque(iteruniq(nsE, key=str), maxlen=0)", setup="from __main__ import deque, nsE, iteruniq", number=1) c1 = timeit("deque(best_case_iteruniq(nsA), maxlen=0)", setup="from __main__ import deque, nsA, best_case_iteruniq", number=1) c2 = timeit("deque(best_case_iteruniq(nsB), maxlen=0)", setup="from __main__ import deque, nsB, best_case_iteruniq", number=1) c3 = timeit("deque(best_case_iteruniq(nsC), maxlen=0)", setup="from __main__ import deque, nsC, best_case_iteruniq", number=1) c4 = timeit("deque(best_case_iteruniq(nsD), maxlen=0)", setup="from __main__ import deque, nsD, best_case_iteruniq", number=1) c5 = timeit("deque(best_case_iteruniq(nsE), maxlen=0)", setup="from __main__ import deque, nsE, best_case_iteruniq", number=1) d1 = timeit("deque(best_case_iteruniq(nsA, key=str), maxlen=0)", setup="from __main__ import deque, nsA, best_case_iteruniq", number=1) d2 = timeit("deque(best_case_iteruniq(nsB, key=str), maxlen=0)", setup="from __main__ import deque, nsB, best_case_iteruniq", number=1) d3 = timeit("deque(best_case_iteruniq(nsC, key=str), maxlen=0)", setup="from __main__ import deque, nsC, best_case_iteruniq", number=1) d4 = timeit("deque(best_case_iteruniq(nsD, key=str), maxlen=0)", setup="from __main__ import deque, nsD, best_case_iteruniq", number=1) d5 = timeit("deque(best_case_iteruniq(nsE, key=str), maxlen=0)", setup="from __main__ import deque, nsE, best_case_iteruniq", number=1) base = min( a1, a2, a3, a4, a5, b1, b2, b3, b4, b5, c1, c2, c3, c4, c5, d1, d2, d3, d4, d5 ) print_line("Original:", a1, a2, a3, a4, a5) print_line("w/ key=str:", b1, b2, b3, b4, b5) print_line("New:", c1, c2, c3, c4, c5) print_line("w/ key=str:", d1, d2, d3, d4, d5) print() print("Testing for correctness") def check_iter_equal(*iterables): for items in zip(*iterables): first = items[0] if not all(i==first for i in items): return False return True assert check_iter_equal(iteruniq(nsA), iteruniq(nsA, key=str), best_case_iteruniq(nsA), best_case_iteruniq(nsA, key=str)) assert check_iter_equal(iteruniq(nsB), iteruniq(nsB, key=str), best_case_iteruniq(nsB), best_case_iteruniq(nsB, key=str)) assert check_iter_equal(iteruniq(nsC), iteruniq(nsC, key=str), best_case_iteruniq(nsC), best_case_iteruniq(nsC, key=str)) assert check_iter_equal(iteruniq(nsD), iteruniq(nsD, key=str), best_case_iteruniq(nsD), best_case_iteruniq(nsD, key=str)) assert check_iter_equal(iteruniq(nsE), iteruniq(nsE, key=str), best_case_iteruniq(nsE), best_case_iteruniq(nsE, key=str)) print("Tests complete")