performance of tight loop
Peter Otten
__peter__ at web.de
Tue Dec 14 06:48:37 EST 2010
gry wrote:
> [python-2.4.3, rh CentOS release 5.5 linux, 24 xeon cpu's, 24GB ram]
> I have a little data generator that I'd like to go faster... any
> suggestions?
> maxint is usually 9223372036854775808(max 64bit int), but could
> occasionally be 99.
> width is usually 500 or 1600, rows ~ 5000.
>
> from random import randint
>
> def row(i, wd, mx):
> first = ['%d' % i]
> rest = ['%d' % randint(1, mx) for i in range(wd - 1)]
> return first + rest
> ...
> while True:
> print "copy %s from stdin direct delimiter ',';" % table_name
> for i in range(i,i+rows):
> print ','.join(row(i, width, maxint))
> print '\.'
I see the biggest potential in inlining randint. Unfortunately you did not
provide an executable script and I had to make it up:
$ cat gry.py
from random import randint
import sys
def row(i, wd, mx):
first = ['%d' % i]
rest = ['%d' % randint(1, mx) for i in range(wd - 1)]
return first + rest
def main():
table_name = "unknown"
maxint = sys.maxint
width = 500
rows = 1000
offset = 0
print "copy %s from stdin direct delimiter ',';" % table_name
for i in range(offset, offset+rows):
print ','.join(row(i, width, maxint))
print '\.'
if __name__ == "__main__":
main()
$ time python gry.py > /dev/null
real 0m5.280s
user 0m5.230s
sys 0m0.050s
$
$ cat gry_inline.py
import random
import math
import sys
def make_rand(n):
if n < 1 << random.BPF:
def rand(random=random.random):
return int(n*random())+1
else:
k = int(1.00001 + math.log(n-1, 2.0))
def rand(getrandbits=random.getrandbits):
r = getrandbits(k)
while r >= n:
r = getrandbits(k)
return r+1
return rand
def row(i, wd, rand):
first = ['%d' % i]
rest = ['%d' % rand() for i in range(wd - 1)]
return first + rest
def main():
table_name = "unknown"
maxint = sys.maxint
width = 500
rows = 1000
offset = 0
rand = make_rand(maxint)
print "copy %s from stdin direct delimiter ',';" % table_name
for i in range(offset, offset+rows):
print ','.join(row(i, width, rand))
print '\.'
if __name__ == "__main__":
main()
$ time python gry_inline.py > /dev/null
real 0m2.004s
user 0m2.000s
sys 0m0.000s
$
Disclaimer: the code in random.py is complex enough that I cannot guarantee
I snatched the right pieces.
Peter
More information about the Python-list
mailing list