longest sequence

Mark McEahern marklists at mceahern.com
Mon Feb 17 19:53:55 EST 2003


[Tim Peters]
> A cheap solution in both cases is to inject another element into the
> tuples that never compares equal, and is known to be cheap to
> compare.  The most natural thingie to inject is the index in the
> original sequence, like so in Python 2.3:
>
> def longest(*args):
>      return max([(len(s), i, s) for i, s in enumerate(args)])[-1]

Thanks to everyone who replied.  The following test code generates the
following results on my Windows machine under cygwin's Python 2.2.1.  I've
included all suggestions made in the thread so far and modified the
longest_dsu and longest_max functions with Tim's advice above:

         longest_for --> 0.14 sec
         longest_dsu --> 0.25 sec
         longest_max --> 0.30 sec
             longest --> 0.40 sec

#!/usr/bin/env python

from time import clock

def longest(*args):
    sorted = list(args)
    sorted.sort(lambda x, y: cmp(len(x), len(y)))
    return sorted[-1]

def longest_dsu(*args):
    decorated = [(len(args[i]), i, args[i]) for i in range(len(args))]
    decorated.sort()
    return decorated[-1][-1]

def longest_max(*args):
    return max([(len(args[i]), i, args[i]) for i in range(len(args))])[-1]

def longest_for(*args):
    max_length = -1
    max_item = None
    for item in args:
        item_length = len(item)
        if item_length > max_length:
            max_item = item
            max_length = item_length
    return max_item

def get_test_functions():
    return [globals()[x] for x in globals() if x.startswith('longest')]

def time_it(f, *args, **kwargs):
    time_in = clock()
    result = f(*args, **kwargs)
    time_out = clock()
    return result, time_out - time_in

def make_data(count):
    return ['a' * x for x in range(count)]

class ResultError(Exception):pass
class InconsistentResultError(ResultError):pass

class Result:

    def __init__(self, func):
        self.func = func
        self.elapsed = 0
        self.result = None

    def run(self, count, *args, **kwargs):
        for x in range(count):
            result, elapsed = time_it(self.func, *args, **kwargs)
            self.elapsed += elapsed
            if self.result is None:
                self.result = result
            else:
                if result != self.result:
                    template = 'Expected %s, got %s with %s(%s, %s)'
                    message = template % (self.result, result,
                                          self.func.func_name, args, kwargs)
                    raise InconsistentResultError(message)

    def __repr__(self):
        return '%20s --> %0.2f sec' % (self.func.func_name, self.elapsed)

    def __cmp__(self, other):
        return cmp(self.elapsed, other.elapsed)

number_of_lists = 100
number_of_runs = 1000
data = make_data(number_of_lists)
tests = [Result(x) for x in get_test_functions()]

for t in tests:
    t.run(number_of_runs, *data)

tests.sort()

expected = tests[0].result
for t in tests:
    assert t.result == expected
    print t

-






More information about the Python-list mailing list