Python example: possible speedup?
Hrvoje Niksic
hniksic at srce.hr
Wed Sep 8 14:01:27 EDT 1999
Hrvoje Niksic <hniksic at srce.hr> writes:
> But I'll try it [slurp all the lines at once and iterate over them],
> for the sake of argument, and see if the speedup is worth it.
Yes, this does speed things up. Instead of 27 CPU seconds, the
program now spends 19.5 seconds of CPU time. Along with:
> > Also, instead of calling next_header on a line-by-line basis, how
> > about calling it once per package and returning a dict containing
> > the header names as keys?
This buys me additional 2.5 CPU seconds.
The problem is that the thing is still twice slower than the
equivalent Perl program, which I haven't bothered to optimize at all.
Oh well.
In case anyone is still interested, after these two optimizations, the
code looks like this:
#!/usr/bin/python
import string
class Dpkg_Reader:
def __init__(self, file):
self.__current = ''
self.__lines = open(file).readlines()
self.__index = 0
def next_package(self):
package = {}
while 1:
if self.__index == len(self.__lines):
return None # EOF
line = self.__lines[self.__index]
self.__index = self.__index + 1
if self.__current:
if line == "\n" or (line[0] != ' ' and line[0] != "\t"):
#print "{%s}" % (self.__current)
try:
name, value = string.split(self.__current, ': ', 1)
except:
name, value = string.split(self.__current, ':', 1)
value = value[:-1]
package[name] = value
if line == "\n":
self.__current = ''
return package
else:
self.__current = line
continue
self.__current = self.__current + line
def close(self):
pass
def process_status(file):
reader = Dpkg_Reader(file)
installed = {}
while 1:
package = reader.next_package()
if package is None:
break
status = string.split(package['Status'], ' ')
if status[2] == 'installed':
installed[package['Package']] = 1
reader.close()
return installed
def process_available(file, installed):
reader = Dpkg_Reader(file)
sizes = {}
while 1:
package = reader.next_package()
if package is None:
break
pname = package['Package']
if installed.has_key(pname):
sizes[pname] = string.atoi(package['Installed-Size'])
reader.close()
return sizes
def main():
installed = process_status('/var/lib/dpkg/status')
sizes = process_available('/var/lib/dpkg/available', installed)
lst = sizes.keys()
lst.sort(lambda a, b, sizes=sizes: cmp(sizes[b], sizes[a]))
for pack in lst:
print "%s: %d" % (pack, sizes[pack])
if __name__ == '__main__':
main()
More information about the Python-list
mailing list