Python example: possible speedup?

Gordon McMillan gmcm at hypernet.com
Wed Sep 8 17:40:34 EDT 1999


> Hrvoje Niksic <hniksic at srce.hr> writes:
> 
> > But I'll try it [slurp all the lines at once and iterate over them],
> > for the sake of argument, and see if the speedup is worth it.
> 
> Yes, this does speed things up.  Instead of 27 CPU seconds, the
> program now spends 19.5 seconds of CPU time.  Along with:
> 
> > > Also, instead of calling next_header on a line-by-line basis, how
> > > about calling it once per package and returning a dict containing
> > > the header names as keys?
> 
> This buys me additional 2.5 CPU seconds.
> 
> The problem is that the thing is still twice slower than the
> equivalent Perl program, which I haven't bothered to optimize at
> all. Oh well.

I'd guess you'd find this getting quite a bit closer to Perl speeds:

#!/usr/bin/python

import string

class Dpkg_Reader:
    def __init__(self, file):
        self.__current = ''
        #data = open(file).read()
        data = file
        self.__pkgs = string.split(data, "\n\n")
        self.__index = 0

    def next_package(self, split=string.split, join=string.join,
    find=string.find):
        if self.__index == len(self.__pkgs):
            return None
        data = self.__pkgs[self.__index]
        self.__index = self.__index + 1
        if find(data, '\n ') > -1:
            data = split(data, '\n ')
            data = join(data, ' ')
        if find(data, '\n\t') > -1:
            data = split(data, '\n\t')
            data = join(data, ' ')
        data = split(data, '\n')
        package = {}
        for line in  data[:-1]:
            try:
                name, value = split(line, ': ', 1)
            except:
                name, value = split(line, ':', 1)
            package[name] = value
        return package

    def close(self):
        pass

def process_status(file):
    reader = Dpkg_Reader(file)
    installed = {}
    while 1:
        package = reader.next_package()
        if package is None:
            break
        status = string.split(package['Status'], ' ')
        if status[2] == 'installed':
            installed[package['Package']] = 1
    reader.close()
    return installed

def process_available(file, installed):
    reader = Dpkg_Reader(file)
    sizes = {}
    while 1:
        package = reader.next_package()
        if package is None:
            break
        pname = package['Package']
        if installed.has_key(pname):
            sizes[pname] = string.atoi(package['Installed-Size'])
    reader.close()
    return sizes

txt = """\
Package: telnet
Status: install ok installed
Priority: standard
Section: net
Installed-Size: 130
Maintainer: Herbert Xu <herbert at debian.org>
Source: netkit-telnet
Version: 0.14-4
Replaces: netstd
Depends: libc6 (>= 2.1), libncurses4 (>= 4.2-3.1)
Description: The telnet client.
 The telnet command is used for interactive communication with another
 host using the TELNET protocol.

"""


def test():
    import time
    start = time.time()
    for i in xrange(100):
      reader = Dpkg_Reader(txt*100)
      while 1:
        rslt = reader.next_package()
        if rslt is None:
           break
    print "Took %4.2f secs" % (time.time()-start)

def main():
    installed = process_status('/var/lib/dpkg/status')
    sizes = process_available('/var/lib/dpkg/available', installed)
    lst = sizes.keys() lst.sort(lambda a, b, sizes=sizes:
    cmp(sizes[b], sizes[a])) for pack in lst:
        print "%s: %d" % (pack, sizes[pack])

if __name__ == '__main__':
    test()


- Gordon




More information about the Python-list mailing list