Help with script with performance problems
Peter Otten
__peter__ at web.de
Sun Nov 23 18:36:55 EST 2003
Peter Otten wrote:
> However, it took 143 seconds to process 10**7 lines generated by
I just downloaded psycho, oops, keep misspelling the name :-) and it brings
down the time to 92 seconds - almost for free. I must say I'm impressed,
the psycologist(s) did an excellent job.
Peter
#!/usr/bin/python -u
import psyco, sys
psyco.full()
def main():
clients = {}
queries = {}
lineNo = -1
threshold = 100
pointmod = 100000
f = file(sys.argv[1])
try:
print "Each dot is %d lines..." % pointmod
for lineNo, line in enumerate(f):
if lineNo % pointmod == 0:
sys.stdout.write(".")
try:
month, day, timestr, stype, source, qtype, query, ctype,
record = line.split()
except ValueError:
raise Exception("problem splitting line %d\n%s" % (lineNo,
line))
source = source.split('#', 1)[0]
clients[source] = clients.get(source, 0) + 1
queries[query] = queries.get(query, 0) + 1
finally:
f.close()
print
print lineNo+1, "lines processed"
for numclient, count in clients.iteritems():
if count > threshold:
print "%s,%s" % (numclient, count)
for numquery, count in queries.iteritems():
if count > threshold:
print "%s,%s" % (numquery, count)
import time
starttime = time.time()
main()
print "time:", time.time() - starttime
More information about the Python-list
mailing list