speed problems
Hans-Peter Jansen
hpj at urpla.net
Thu Jun 10 14:27:22 EDT 2004
Hi Axel & Pythoneers,
I played around with your scripts, and my winner got a bit longer than
usual. I hope, the important part doesn't suffered to much, but thanks
to a cheat, this one is faster than your original perl script, even with
profiling and annotations enabled! Talking 'bout the latter: shamelessly
stolen from a Zope check in by our master master, because I couldn't get
hotshot to produce useful per line statistics out of the box.
Well, although I have to admit, that perl seems faster on this specific
task (since the grep cheat would work for perl too), I would never
consider such a move, just try to do this with perl:
---8<--- [virstat.py] ---8<---
import os
import re
maillogs = [
#gzip = "/usr/bin/gzip -dc"
#bzip2 = "/usr/bin/bzip2 -dc"
gzip = "/usr/bin/zcat"
bzip2 = "/usr/bin/bzcat"
virstat = {}
total = 0
doprof = 1
pat = re.compile( "INFECTED \((.*)\)" )
def dovirstat():
global virstat, total
for logfile in maillogs:
if os.path.isfile(logfile):
# is it compressed?
if logfile.endswith('.gz'):
#ifd, lfd = os.popen2("%s %s" % (gzip, logfile))
#XXX: cheating
ifd, lfd = os.popen2("%s %s | grep INFECTED" % (gzip, logfile))
elif logfile.endswith('.bz2'):
#ifd, lfd = os.popen2("%s %s" % (bzip2, logfile))
#XXX: cheating
ifd, lfd = os.popen2("%s %s | grep INFECTED" % (bzip2, logfile))
# uncompressed
lfd = open(logfile, "r")
# hot loop
for line in lfd:
mo = pat.search(line)
if mo:
for vnam in mo.group(1).split( ", "):
virstat[vnam] = virstat.get(vnam, 0) + 1
total += 1
# else:
# print "logfile '%s' doesn't exist, skipping it." % logfile
def load_line_info(log):
byline = {}
prevloc = None
for what, place, tdelta in log:
if tdelta > 0:
t, nhits = byline.get(prevloc, (0, 0))
byline[prevloc] = (tdelta + t), (nhits + 1)
prevloc = place
return byline
def basename(path, cache={}):
return cache[path]
except KeyError:
fn = os.path.split(path)[1]
cache[path] = fn
return fn
def print_results(results):
for info, place in results:
if not place:
print 'Bad unpack:', info, place
filename, line, funcname = place
print '%8d %8d' % info, basename(filename), line
def annotate_results(results):
files = {}
for stats, place in results:
if not place:
time, hits = stats
file, line, func = place
l = files.get(file)
if l is None:
l = files[file] = []
l.append((line, hits, time))
order = files.keys()
for k in order:
if os.path.exists(k):
v = files[k]
annotate(k, v)
def annotate(file, lines):
print "-" * 60
print file
print "-" * 60
f = open(file)
i = 1
match = lines[0][0]
for line in f:
if match == i:
print "%6d %8d " % lines[0][1:], line,
del lines[0]
if lines:
match = lines[0][0]
match = None
print " " * 16, line,
i += 1
if not doprof:
import hotshot
prof = hotshot.Profile("virstat.prof", lineevents=1)
vlist = virstat.keys()
for vname in vlist:
p = (virstat[vname] / float(total)) * 100
print "%-30s %5.2f%%" % (vname, p)
if doprof:
from hotshot.log import LogReader
log = LogReader("virstat.prof")
byline = load_line_info(log)
results = [(v, k) for k, v in byline.items() if k and k[0] == 'virstat.py' ]
Python programming is not only an easy way to get necessary work done,
on it's best it combines art and science in an esthetic manner.
More information about the Python-list
mailing list