Refactor a buffered class...
Michael Spencer
mahs at telcopartners.com
Thu Sep 7 00:52:12 EDT 2006
George Sakkis wrote:
> Michael Spencer wrote:
>
>> Here's a small update to the generator that allows optional handling of the head
>> and the tail:
>>
>> def chunker(s, chunk_size=3, sentry=".", keep_first = False, keep_last = False):
>> buffer=[]
...
>
> And here's a (probably) more efficient version, using a deque as a
> buffer:
>
Perhaps the deque-based solution is more efficient under some conditions, but
it's significantly slower for all the cases I tested:
Here are some typical results:
Using George's deque buffer:
>>> time_chunkers(chunkerGS, groups=1000, words_per_group=1000, chunk_size=300)
'get_chunks(...) 30 iterations, 16.70msec per call'
>>> time_chunkers(chunkerGS, groups=1000, words_per_group=1000, chunk_size=30)
'get_chunks(...) 35 iterations, 14.56msec per call'
>>> time_chunkers(chunkerGS, groups=1000, words_per_group=1000, chunk_size=3)
'get_chunks(...) 35 iterations, 14.41msec per call'
Using the list buffer
>>> time_chunkers(chunker, groups=1000, words_per_group=1000, chunk_size=300)
'get_chunks(...) 85 iterations, 5.88msec per call'
>>> time_chunkers(chunker, groups=1000, words_per_group=1000, chunk_size=30)
'get_chunks(...) 85 iterations, 5.89msec per call'
>>> time_chunkers(chunker, groups=1000, words_per_group=1000, chunk_size=3)
'get_chunks(...) 83 iterations, 6.03msec per call'
>>>
Test functions follow:
def make_seq(groups = 1000, words_per_group = 3, word_length = 76, sentry = "."):
"""Make a sequence of test input for chunker
>>> make_seq(groups = 5, words_per_group=5, word_length = 2, sentry="%")
['WW', 'WW', 'WW', 'WW', 'WW', '%', 'WW', 'WW', 'WW', 'WW', 'WW', '%',
'WW', 'WW', 'WW', 'WW', 'WW', '%', 'WW', 'WW', 'WW', 'WW', 'WW', '%',
'WW', 'WW', 'WW', 'WW', 'WW', '%']
"""
word = "W"*word_length
group = [word]*words_per_group+[sentry]
return group*groups
def time_chunkers(chunk_func, groups = 1000, words_per_group=10, chunk_size=3):
"""Test harness for chunker functions"""
seq = make_seq(groups)
def get_chunks(chunk_func, seq):
return list(chunk_func(seq))
return timefunc(get_chunks, chunk_func, seq)
def _get_timer():
import sys
import time
if sys.platform == "win32":
return time.clock
else:
return time.time
return
def timefunc(func, *args, **kwds):
timer = _get_timer()
count, totaltime = 0, 0
while totaltime < 0.5:
t1 = timer()
res = func(*args, **kwds)
t2 = timer()
totaltime += (t2-t1)
count += 1
if count > 1000:
unit = "usec"
timeper = totaltime * 1000000 / count
else:
unit = "msec"
timeper = totaltime * 1000 / count
return "%s(...) %s iterations, %.2f%s per call" % \
(func.__name__, count, timeper, unit)
More information about the Python-list
mailing list