Python example: possible speedup?
Gordon McMillan
gmcm at hypernet.com
Wed Sep 8 17:40:34 EDT 1999
> Hrvoje Niksic <hniksic at srce.hr> writes:
>
> > But I'll try it [slurp all the lines at once and iterate over them],
> > for the sake of argument, and see if the speedup is worth it.
>
> Yes, this does speed things up. Instead of 27 CPU seconds, the
> program now spends 19.5 seconds of CPU time. Along with:
>
> > > Also, instead of calling next_header on a line-by-line basis, how
> > > about calling it once per package and returning a dict containing
> > > the header names as keys?
>
> This buys me additional 2.5 CPU seconds.
>
> The problem is that the thing is still twice slower than the
> equivalent Perl program, which I haven't bothered to optimize at
> all. Oh well.
I'd guess you'd find this getting quite a bit closer to Perl speeds:
#!/usr/bin/python
import string
class Dpkg_Reader:
def __init__(self, file):
self.__current = ''
#data = open(file).read()
data = file
self.__pkgs = string.split(data, "\n\n")
self.__index = 0
def next_package(self, split=string.split, join=string.join,
find=string.find):
if self.__index == len(self.__pkgs):
return None
data = self.__pkgs[self.__index]
self.__index = self.__index + 1
if find(data, '\n ') > -1:
data = split(data, '\n ')
data = join(data, ' ')
if find(data, '\n\t') > -1:
data = split(data, '\n\t')
data = join(data, ' ')
data = split(data, '\n')
package = {}
for line in data[:-1]:
try:
name, value = split(line, ': ', 1)
except:
name, value = split(line, ':', 1)
package[name] = value
return package
def close(self):
pass
def process_status(file):
reader = Dpkg_Reader(file)
installed = {}
while 1:
package = reader.next_package()
if package is None:
break
status = string.split(package['Status'], ' ')
if status[2] == 'installed':
installed[package['Package']] = 1
reader.close()
return installed
def process_available(file, installed):
reader = Dpkg_Reader(file)
sizes = {}
while 1:
package = reader.next_package()
if package is None:
break
pname = package['Package']
if installed.has_key(pname):
sizes[pname] = string.atoi(package['Installed-Size'])
reader.close()
return sizes
txt = """\
Package: telnet
Status: install ok installed
Priority: standard
Section: net
Installed-Size: 130
Maintainer: Herbert Xu <herbert at debian.org>
Source: netkit-telnet
Version: 0.14-4
Replaces: netstd
Depends: libc6 (>= 2.1), libncurses4 (>= 4.2-3.1)
Description: The telnet client.
The telnet command is used for interactive communication with another
host using the TELNET protocol.
"""
def test():
import time
start = time.time()
for i in xrange(100):
reader = Dpkg_Reader(txt*100)
while 1:
rslt = reader.next_package()
if rslt is None:
break
print "Took %4.2f secs" % (time.time()-start)
def main():
installed = process_status('/var/lib/dpkg/status')
sizes = process_available('/var/lib/dpkg/available', installed)
lst = sizes.keys() lst.sort(lambda a, b, sizes=sizes:
cmp(sizes[b], sizes[a])) for pack in lst:
print "%s: %d" % (pack, sizes[pack])
if __name__ == '__main__':
test()
- Gordon
More information about the Python-list
mailing list