performance of tight loop

Peter Otten __peter__ at web.de
Tue Dec 14 06:48:37 EST 2010


gry wrote:

> [python-2.4.3, rh CentOS release 5.5 linux, 24 xeon cpu's, 24GB ram]
> I have a little data generator that I'd like to go faster... any
> suggestions?
> maxint is usually 9223372036854775808(max 64bit int), but could
> occasionally be 99.
> width is usually 500 or 1600, rows ~ 5000.
> 
> from random import randint
> 
> def row(i, wd, mx):
>     first = ['%d' % i]
>     rest =  ['%d' % randint(1, mx) for i in range(wd - 1)]
>     return first + rest
> ...
>     while True:
>         print "copy %s from stdin direct delimiter ',';" % table_name
>         for i in range(i,i+rows):
>             print ','.join(row(i, width, maxint))
>         print '\.'

I see the biggest potential in inlining randint. Unfortunately you did not 
provide an executable script and I had to make it up:

$ cat gry.py
from random import randint
import sys

def row(i, wd, mx):
    first = ['%d' % i]
    rest =  ['%d' % randint(1, mx) for i in range(wd - 1)]
    return first + rest

def main():
    table_name = "unknown"
    maxint = sys.maxint
    width = 500
    rows = 1000
    offset = 0

    print "copy %s from stdin direct delimiter ',';" % table_name
    for i in range(offset, offset+rows):
        print ','.join(row(i, width, maxint))
    print '\.'

if __name__ == "__main__":
    main()
$ time python gry.py > /dev/null

real    0m5.280s
user    0m5.230s
sys     0m0.050s
$

$ cat gry_inline.py
import random
import math
import sys

def make_rand(n):
    if n < 1 << random.BPF:
        def rand(random=random.random):
            return int(n*random())+1
    else:
        k = int(1.00001 + math.log(n-1, 2.0))
        def rand(getrandbits=random.getrandbits):
            r = getrandbits(k)
            while r >= n:
                r = getrandbits(k)
            return r+1
    return rand

def row(i, wd, rand):
    first = ['%d' % i]
    rest =  ['%d' % rand() for i in range(wd - 1)]
    return first + rest

def main():
    table_name = "unknown"
    maxint = sys.maxint
    width = 500
    rows = 1000
    offset = 0

    rand = make_rand(maxint)

    print "copy %s from stdin direct delimiter ',';" % table_name
    for i in range(offset, offset+rows):
        print ','.join(row(i, width, rand))
    print '\.'

if __name__ == "__main__":
    main()
$ time python gry_inline.py > /dev/null

real    0m2.004s
user    0m2.000s
sys     0m0.000s
$

Disclaimer: the code in random.py is complex enough that I cannot guarantee 
I snatched the right pieces.

Peter



More information about the Python-list mailing list