Fri, 06 Jul 2001 10:08:51 -0700

Update of /cvsroot/python/python/dist/src/Tools/scripts
In directory usw-pr-cvs1:/tmp/cvs-serv3084

Added Files:
      Tag: descr-branch
	dutree.doc dutree.py eptags.py findlinksto.py fixcid.py 
	fixheader.py fixnotice.py fixps.py ftpmirror.py gencodec.py 
	h2py.py ifdef.py lfcr.py linktree.py lll.py logmerge.py 
	mailerdaemon.py md5sum.py methfix.py mkreal.py ndiff.py 
	nm2def.py objgraph.py parseentities.py pathfix.py pdeps.py 
	pindent.py ptags.py pydoc.pyw redemo.py reindent.py rgrep.py 
	suff.py sum5.py texi2html.py trace.py treesync.py untabify.py 
	which.py xxci.py 
Log Message:
Adding "the rest" of Tools/scripts to descr-branch.

--- NEW FILE: dutree.doc ---
Path: cwi.nl!sun4nl!mcsun!uunet!cs.utexas.edu!convex!usenet
From: tchrist@convex.COM (Tom Christiansen)
Newsgroups: comp.lang.perl
Subject: Re: The problems of Perl (Re: Question (silly?))
Message-ID: <1992Jan17.053115.4220@convex.com>
Date: 17 Jan 92 05:31:15 GMT
References: <17458@ector.cs.purdue.edu> <1992Jan16.165347.25583@cherokee.uswest.com> <=#Hues+4@cs.psu.edu>
Sender: usenet@convex.com (news access account)
Reply-To: tchrist@convex.COM (Tom Christiansen)
Organization: CONVEX Realtime Development, Colorado Springs, CO
Lines: 83
Nntp-Posting-Host: pixel.convex.com

>From the keyboard of flee@cs.psu.edu (Felix Lee):
:And Perl is definitely awkward with data types.  I haven't yet found a
:pleasant way of shoving non-trivial data types into Perl's grammar.

Yes, it's pretty aweful at that, alright.  Sometimes I write perl programs
that need them, and sometimes it just takes a little creativity.  But
sometimes it's not worth it.  I actually wrote a C program the other day
(gasp) because I didn't want to deal with a game matrix with six links per node.

:Here's a very simple problem that's tricky to express in Perl: process
:the output of "du" to produce output that's indented to reflect the
:tree structure, and with each subtree sorted by size.  Something like:
:    434 /etc
:      |     344 .
:      |      50 install
:      |      35 uucp
:      |       3 nserve
:      |       |       2 .
:      |       |       1 auth.info
:      |       1 sm
:      |       1 sm.bak

At first I thought I could just keep one local list around
at once, but this seems inherently recursive.  Which means 
I need an real recursive data structure.  Maybe you could
do it with one of the %assoc arrays Larry uses in the begat
programs, but I broke down and got dirty.  I think the hardest
part was matching Felix's desired output exactly.  It's not 
blazingly fast: I should probably inline the &childof routine,
but it *was* faster to write than I could have written the 
equivalent C program.

--tom

--
"GUIs normally make it simple to accomplish simple actions and impossible
to accomplish complex actions."   --Doug Gwyn  (22/Jun/91 in comp.unix.wizards)

     Tom Christiansen           tchrist@convex.com      convex!tchrist

--- NEW FILE: dutree.py ---
#! /usr/bin/env python
# Format du output in a tree shape

import os, string, sys, errno

def main():
    p = os.popen('du ' + string.join(sys.argv[1:]), 'r')
    total, d = None, {}
    for line in p.readlines():
        i = 0
        while line[i] in '0123456789': i = i+1
        size = eval(line[:i])
        while line[i] in ' \t': i = i+1
        file = line[i:-1]
        comps = string.splitfields(file, '/')
        if comps[0] == '': comps[0] = '/'
        if comps[len(comps)-1] == '': del comps[len(comps)-1]
        total, d = store(size, comps, total, d)
    try:
        display(total, d)
    except IOError, e:
        if e.errno != errno.EPIPE:
            raise

def store(size, comps, total, d):
    if comps == []:
        return size, d
    if not d.has_key(comps[0]):
        d[comps[0]] = None, {}
    t1, d1 = d[comps[0]]
    d[comps[0]] = store(size, comps[1:], t1, d1)
    return total, d

def display(total, d):
    show(total, d, '')

def show(total, d, prefix):
    if not d: return
    list = []
    sum = 0
    for key in d.keys():
        tsub, dsub = d[key]
        list.append((tsub, key))
        if tsub is not None: sum = sum + tsub
##  if sum < total:
##      list.append((total - sum, os.curdir))
    list.sort()
    list.reverse()
    width = len(`list[0][0]`)
    for tsub, key in list:
        if tsub is None:
            psub = prefix
        else:
            print prefix + string.rjust(`tsub`, width) + ' ' + key
            psub = prefix + ' '*(width-1) + '|' + ' '*(len(key)+1)
        if d.has_key(key):
            show(tsub, d[key][1], psub)

main()

--- NEW FILE: eptags.py ---
#! /usr/bin/env python
"""Create a TAGS file for Python programs, usable with GNU Emacs.

usage: eptags pyfiles...

The output TAGS file is usable with Emacs version 18, 19, 20.
Tagged are:
 - functions (even inside other defs or classes)
 - classes

eptags warns about files it cannot open.
eptags will not give warnings about duplicate tags.

BUGS:
   Because of tag duplication (methods with the same name in different
   classes), TAGS files are not very useful for most object-oriented
   python projects.
"""
import sys,re

expr = r'^[ \t]*(def|class)[ \t]+([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*[:\(]'
matcher = re.compile(expr)

def treat_file(file, outfp):
    """Append tags found in file named 'file' to the open file 'outfp'"""
    try:
        fp = open(file, 'r')
    except:
        sys.stderr.write('Cannot open %s\n'%file)
        return
    charno = 0
    lineno = 0
    tags = []
    size = 0
    while 1:
        line = fp.readline()
        if not line:
            break
        lineno = lineno + 1
        m = matcher.search(line)
        if m:
            tag = m.group(0) + '\177%d,%d\n'%(lineno,charno)
            tags.append(tag)
            size = size + len(tag)
        charno = charno + len(line)
    outfp.write('\f\n%s,%d\n'%(file,size))
    for tag in tags:
        outfp.write(tag)

def main():
    outfp = open('TAGS', 'w')
    for file in sys.argv[1:]:
        treat_file(file, outfp)

if __name__=="__main__":
    main()

--- NEW FILE: findlinksto.py ---
#! /usr/bin/env python

# findlinksto
#
# find symbolic links to a path matching a regular expression

import os
import sys
import regex
import getopt

def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], '')
        if len(args) < 2:
            raise getopt.error, 'not enough arguments'
    except getopt.error, msg:
        sys.stdout = sys.stderr
        print msg
        print 'usage: findlinksto pattern directory ...'
        sys.exit(2)
    pat, dirs = args[0], args[1:]
    prog = regex.compile(pat)
    for dirname in dirs:
        os.path.walk(dirname, visit, prog)

def visit(prog, dirname, names):
    if os.path.islink(dirname):
        names[:] = []
        return
    if os.path.ismount(dirname):
        print 'descend into', dirname
    for name in names:
        name = os.path.join(dirname, name)
        try:
            linkto = os.readlink(name)
            if prog.search(linkto) >= 0:
                print name, '->', linkto
        except os.error:
            pass

main()

--- NEW FILE: fixcid.py ---
#! /usr/bin/env python

# Perform massive identifier substitution on C source files.
# This actually tokenizes the files (to some extent) so it can
# avoid making substitutions inside strings or comments.
# Inside strings, substitutions are never made; inside comments,
# it is a user option (off by default).
#
# The substitutions are read from one or more files whose lines,
# when not empty, after stripping comments starting with #,
# must contain exactly two words separated by whitespace: the
# old identifier and its replacement.
#
# The option -r reverses the sense of the substitutions (this may be
# useful to undo a particular substitution).
#
# If the old identifier is prefixed with a '*' (with no intervening
# whitespace), then it will not be substituted inside comments.
#
# Command line arguments are files or directories to be processed.
# Directories are searched recursively for files whose name looks
# like a C file (ends in .h or .c).  The special filename '-' means
# operate in filter mode: read stdin, write stdout.
#
# Symbolic links are always ignored (except as explicit directory
# arguments).
#
# The original files are kept as back-up with a "~" suffix.
#
# Changes made are reported to stdout in a diff-like format.
#
# NB: by changing only the function fixline() you can turn this
# into a program for different changes to C source files; by
# changing the function wanted() you can make a different selection of
# files.

import sys
import regex
import string
import os
from stat import *
import getopt

err = sys.stderr.write
dbg = err
rep = sys.stdout.write

def usage():
    progname = sys.argv[0]
    err('Usage: ' + progname +
              ' [-c] [-r] [-s file] ... file-or-directory ...\n')
    err('\n')
    err('-c           : substitute inside comments\n')
    err('-r           : reverse direction for following -s options\n')
    err('-s substfile : add a file of substitutions\n')
    err('\n')
    err('Each non-empty non-comment line in a substitution file must\n')
    err('contain exactly two words: an identifier and its replacement.\n')
    err('Comments start with a # character and end at end of line.\n')
    err('If an identifier is preceded with a *, it is not substituted\n')
    err('inside a comment even when -c is specified.\n')

def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'crs:')
    except getopt.error, msg:
        err('Options error: ' + str(msg) + '\n')
        usage()
        sys.exit(2)
    bad = 0
    if not args: # No arguments
        usage()
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-c':
            setdocomments()
        if opt == '-r':
            setreverse()
        if opt == '-s':
            addsubst(arg)
    for arg in args:
        if os.path.isdir(arg):
            if recursedown(arg): bad = 1
        elif os.path.islink(arg):
            err(arg + ': will not process symbolic links\n')
            bad = 1
        else:
            if fix(arg): bad = 1
    sys.exit(bad)

# Change this regular expression to select a different set of files
Wanted = '^[a-zA-Z0-9_]+\.[ch]$'
def wanted(name):
    return regex.match(Wanted, name) >= 0

def recursedown(dirname):
    dbg('recursedown(' + `dirname` + ')\n')
    bad = 0
    try:
        names = os.listdir(dirname)
    except os.error, msg:
        err(dirname + ': cannot list directory: ' + str(msg) + '\n')
        return 1
    names.sort()
    subdirs = []
    for name in names:
        if name in (os.curdir, os.pardir): continue
        fullname = os.path.join(dirname, name)
        if os.path.islink(fullname): pass
        elif os.path.isdir(fullname):
            subdirs.append(fullname)
        elif wanted(name):
            if fix(fullname): bad = 1
    for fullname in subdirs:
        if recursedown(fullname): bad = 1
    return bad

def fix(filename):
##  dbg('fix(' + `filename` + ')\n')
    if filename == '-':
        # Filter mode
        f = sys.stdin
        g = sys.stdout
    else:
        # File replacement mode
        try:
            f = open(filename, 'r')
        except IOError, msg:
            err(filename + ': cannot open: ' + str(msg) + '\n')
            return 1
        head, tail = os.path.split(filename)
        tempname = os.path.join(head, '@' + tail)
        g = None
    # If we find a match, we rewind the file and start over but
    # now copy everything to a temp file.
    lineno = 0
    initfixline()
    while 1:
        line = f.readline()
        if not line: break
        lineno = lineno + 1
        while line[-2:] == '\\\n':
            nextline = f.readline()
            if not nextline: break
            line = line + nextline
            lineno = lineno + 1
        newline = fixline(line)
        if newline != line:
            if g is None:
                try:
                    g = open(tempname, 'w')
                except IOError, msg:
                    f.close()
                    err(tempname+': cannot create: '+
                        str(msg)+'\n')
                    return 1
                f.seek(0)
                lineno = 0
                initfixline()
                rep(filename + ':\n')
                continue # restart from the beginning
            rep(`lineno` + '\n')
            rep('< ' + line)
            rep('> ' + newline)
        if g is not None:
            g.write(newline)

    # End of file
    if filename == '-': return 0 # Done in filter mode
    f.close()
    if not g: return 0 # No changes

    # Finishing touch -- move files

    # First copy the file's mode to the temp file
    try:
        statbuf = os.stat(filename)
        os.chmod(tempname, statbuf[ST_MODE] & 07777)
    except os.error, msg:
        err(tempname + ': warning: chmod failed (' + str(msg) + ')\n')
    # Then make a backup of the original file as filename~
    try:
        os.rename(filename, filename + '~')
    except os.error, msg:
        err(filename + ': warning: backup failed (' + str(msg) + ')\n')
    # Now move the temp file to the original file
    try:
        os.rename(tempname, filename)
    except os.error, msg:
        err(filename + ': rename failed (' + str(msg) + ')\n')
        return 1
    # Return succes
    return 0

# Tokenizing ANSI C (partly)

Identifier = '\(struct \)?[a-zA-Z_][a-zA-Z0-9_]+'
String = '"\([^\n\\"]\|\\\\.\)*"'
Char = '\'\([^\n\\\']\|\\\\.\)*\''
CommentStart = '/\*'
CommentEnd = '\*/'

Hexnumber = '0[xX][0-9a-fA-F]*[uUlL]*'
Octnumber = '0[0-7]*[uUlL]*'
Decnumber = '[1-9][0-9]*[uUlL]*'
Intnumber = Hexnumber + '\|' + Octnumber + '\|' + Decnumber
Exponent = '[eE][-+]?[0-9]+'
Pointfloat = '\([0-9]+\.[0-9]*\|\.[0-9]+\)\(' + Exponent + '\)?'
Expfloat = '[0-9]+' + Exponent
Floatnumber = Pointfloat + '\|' + Expfloat
Number = Floatnumber + '\|' + Intnumber

# Anything else is an operator -- don't list this explicitly because of '/*'

OutsideComment = (Identifier, Number, String, Char, CommentStart)
OutsideCommentPattern = '\(' + string.joinfields(OutsideComment, '\|') + '\)'
OutsideCommentProgram = regex.compile(OutsideCommentPattern)

InsideComment = (Identifier, Number, CommentEnd)
InsideCommentPattern = '\(' + string.joinfields(InsideComment, '\|') + '\)'
InsideCommentProgram = regex.compile(InsideCommentPattern)

def initfixline():
    global Program
    Program = OutsideCommentProgram

def fixline(line):
    global Program
##  print '-->', `line`
    i = 0
    while i < len(line):
        i = Program.search(line, i)
        if i < 0: break
        found = Program.group(0)
##      if Program is InsideCommentProgram: print '...',
##      else: print '   ',
##      print found
        if len(found) == 2:
            if found == '/*':
                Program = InsideCommentProgram
            elif found == '*/':
                Program = OutsideCommentProgram
        n = len(found)
        if Dict.has_key(found):
            subst = Dict[found]
            if Program is InsideCommentProgram:
                if not Docomments:
                    print 'Found in comment:', found
                    i = i + n
                    continue
                if NotInComment.has_key(found):
##                  print 'Ignored in comment:',
##                  print found, '-->', subst
##                  print 'Line:', line,
                    subst = found
##              else:
##                  print 'Substituting in comment:',
##                  print found, '-->', subst
##                  print 'Line:', line,
            line = line[:i] + subst + line[i+n:]
            n = len(subst)
        i = i + n
    return line

Docomments = 0
def setdocomments():
    global Docomments
    Docomments = 1

Reverse = 0
def setreverse():
    global Reverse
    Reverse = (not Reverse)

Dict = {}
NotInComment = {}
def addsubst(substfile):
    try:
        fp = open(substfile, 'r')
    except IOError, msg:
        err(substfile + ': cannot read substfile: ' + str(msg) + '\n')
        sys.exit(1)
    lineno = 0
    while 1:
        line = fp.readline()
        if not line: break
        lineno = lineno + 1
        try:
            i = string.index(line, '#')
        except string.index_error:
            i = -1          # Happens to delete trailing \n
        words = string.split(line[:i])
        if not words: continue
        if len(words) == 3 and words[0] == 'struct':
            words[:2] = [words[0] + ' ' + words[1]]
        elif len(words) <> 2:
            err(substfile + ':' + `lineno` +
                      ': warning: bad line: ' + line)
            continue
        if Reverse:
            [value, key] = words
        else:
            [key, value] = words
        if value[0] == '*':
            value = value[1:]
        if key[0] == '*':
            key = key[1:]
            NotInComment[key] = value
        if Dict.has_key(key):
            err(substfile + ':' + `lineno` +
                      ': warning: overriding: ' +
                      key + ' ' + value + '\n')
            err(substfile + ':' + `lineno` +
                      ': warning: previous: ' + Dict[key] + '\n')
        Dict[key] = value
    fp.close()

main()

--- NEW FILE: fixheader.py ---
#! /usr/bin/env python

# Add some standard cpp magic to a header file

import sys
import string

def main():
    args = sys.argv[1:]
    for file in args:
        process(file)

def process(file):
    try:
        f = open(file, 'r')
    except IOError, msg:
        sys.stderr.write('%s: can\'t open: %s\n' % (file, str(msg)))
        return
    data = f.read()
    f.close()
    if data[:2] <> '/*':
        sys.stderr.write('%s does not begin with C comment\n' % file)
        return
    try:
        f = open(file, 'w')
    except IOError, msg:
        sys.stderr.write('%s: can\'t write: %s\n' % (file, str(msg)))
        return
    sys.stderr.write('Processing %s ...\n' % file)
    magic = 'Py_'
    for c in file:
        if c in string.letters + string.digits:
            magic = magic + string.upper(c)
        else: magic = magic + '_'
    sys.stdout = f
    print '#ifndef', magic
    print '#define', magic
    print '#ifdef __cplusplus'
    print 'extern "C" {'
    print '#endif'
    print
    f.write(data)
    print
    print '#ifdef __cplusplus'
    print '}'
    print '#endif'
    print '#endif /*', '!'+magic, '*/'

main()

--- NEW FILE: fixnotice.py ---
#! /usr/bin/env python

OLD_NOTICE = """/***********************************************************
Copyright (c) 2000, BeOpen.com.
Copyright (c) 1995-2000, Corporation for National Research Initiatives.
Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
All rights reserved.

See the file "Misc/COPYRIGHT" for information on usage and
redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
******************************************************************/
"""

NEW_NOTICE = ""

# " <-- Help Emacs

import os, sys, string

def main():
    args = sys.argv[1:]
    if not args:
        print "No arguments."
    for arg in args:
        process(arg)

def process(arg):
    f = open(arg)
    data = f.read()
    f.close()
    i = string.find(data, OLD_NOTICE)
    if i < 0:
##      print "No old notice in", arg
        return
    data = data[:i] + NEW_NOTICE + data[i+len(OLD_NOTICE):]
    new = arg + ".new"
    backup = arg + ".bak"
    print "Replacing notice in", arg, "...",
    sys.stdout.flush()
    f = open(new, "w")
    f.write(data)
    f.close()
    os.rename(arg, backup)
    os.rename(new, arg)
    print "done"

if __name__ == '__main__':
    main()

--- NEW FILE: fixps.py ---
#!/usr/bin/env python

# Fix Python script(s) to reference the interpreter via /usr/bin/env python.
# Warning: this overwrites the file without making a backup.

import sys
import re

def main():
    for file in sys.argv[1:]:
        try:
            f = open(file, 'r')
        except IOError, msg:
            print file, ': can\'t open :', msg
            continue
        line = f.readline()
        if not re.match('^#! */usr/local/bin/python', line):
            print file, ': not a /usr/local/bin/python script'
            f.close()
            continue
        rest = f.read()
        f.close()
        line = re.sub('/usr/local/bin/python',
                      '/usr/bin/env python', line)
        print file, ':', `line`
        f = open(file, "w")
        f.write(line)
        f.write(rest)
        f.close()

main()

--- NEW FILE: ftpmirror.py ---
#! /usr/bin/env python

"""Mirror a remote ftp subtree into a local directory tree.

usage: ftpmirror [-v] [-q] [-i] [-m] [-n] [-r] [-s pat]
                 [-l username [-p passwd [-a account]]]
                 hostname [remotedir [localdir]]
-v: verbose
-q: quiet
-i: interactive mode
-m: macintosh server (NCSA telnet 2.4) (implies -n -s '*.o')
-n: don't log in
-r: remove local files/directories no longer pertinent
-l username [-p passwd [-a account]]: login info (default .netrc or anonymous)
-s pat: skip files matching pattern
hostname: remote host
remotedir: remote directory (default initial)
localdir: local directory (default current)
"""

import os
import sys
import time
import getopt
import string
import ftplib
import netrc
from fnmatch import fnmatch

# Print usage message and exit
def usage(*args):
    sys.stdout = sys.stderr
    for msg in args: print msg
    print __doc__
    sys.exit(2)

verbose = 1 # 0 for -q, 2 for -v
interactive = 0
mac = 0
rmok = 0
nologin = 0
skippats = ['.', '..', '.mirrorinfo']

# Main program: parse command line and start processing
def main():
    global verbose, interactive, mac, rmok, nologin
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'a:bil:mnp:qrs:v')
    except getopt.error, msg:
        usage(msg)
    login = ''
    passwd = ''
    account = ''
    if not args: usage('hostname missing')
    host = args[0]
    try:
        auth = netrc.netrc().authenticators(host)
        if auth is not None:
            login, account, passwd = auth
    except (netrc.NetrcParseError, IOError):
        pass
    for o, a in opts:
        if o == '-l': login = a
        if o == '-p': passwd = a
        if o == '-a': account = a
        if o == '-v': verbose = verbose + 1
        if o == '-q': verbose = 0
        if o == '-i': interactive = 1
        if o == '-m': mac = 1; nologin = 1; skippats.append('*.o')
        if o == '-n': nologin = 1
        if o == '-r': rmok = 1
        if o == '-s': skippats.append(a)
    remotedir = ''
    localdir = ''
    if args[1:]:
        remotedir = args[1]
        if args[2:]:
            localdir = args[2]
            if args[3:]: usage('too many arguments')
    #
    f = ftplib.FTP()
    if verbose: print 'Connecting to %s...' % `host`
    f.connect(host)
    if not nologin:
        if verbose:
            print 'Logging in as %s...' % `login or 'anonymous'`
        f.login(login, passwd, account)
    if verbose: print 'OK.'
    pwd = f.pwd()
    if verbose > 1: print 'PWD =', `pwd`
    if remotedir:
        if verbose > 1: print 'cwd(%s)' % `remotedir`
        f.cwd(remotedir)
        if verbose > 1: print 'OK.'
        pwd = f.pwd()
        if verbose > 1: print 'PWD =', `pwd`
    #
    mirrorsubdir(f, localdir)

# Core logic: mirror one subdirectory (recursively)
def mirrorsubdir(f, localdir):
    pwd = f.pwd()
    if localdir and not os.path.isdir(localdir):
        if verbose: print 'Creating local directory', `localdir`
        try:
            makedir(localdir)
        except os.error, msg:
            print "Failed to establish local directory", `localdir`
            return
    infofilename = os.path.join(localdir, '.mirrorinfo')
    try:
        text = open(infofilename, 'r').read()
    except IOError, msg:
        text = '{}'
    try:
        info = eval(text)
    except (SyntaxError, NameError):
        print 'Bad mirror info in %s' % `infofilename`
        info = {}
    subdirs = []
    listing = []
    if verbose: print 'Listing remote directory %s...' % `pwd`
    f.retrlines('LIST', listing.append)
    filesfound = []
    for line in listing:
        if verbose > 1: print '-->', `line`
        if mac:
            # Mac listing has just filenames;
            # trailing / means subdirectory
            filename = string.strip(line)
            mode = '-'
            if filename[-1:] == '/':
                filename = filename[:-1]
                mode = 'd'
            infostuff = ''
        else:
            # Parse, assuming a UNIX listing
            words = string.split(line, None, 8)
            if len(words) < 6:
                if verbose > 1: print 'Skipping short line'
                continue
            filename = string.lstrip(words[-1])
            i = string.find(filename, " -> ")
            if i >= 0:
                # words[0] had better start with 'l'...
                if verbose > 1:
                    print 'Found symbolic link %s' % `filename`
                linkto = filename[i+4:]
                filename = filename[:i]
            infostuff = words[-5:-1]
            mode = words[0]
        skip = 0
        for pat in skippats:
            if fnmatch(filename, pat):
                if verbose > 1:
                    print 'Skip pattern', `pat`,
                    print 'matches', `filename`
                skip = 1
                break
        if skip:
            continue
        if mode[0] == 'd':
            if verbose > 1:
                print 'Remembering subdirectory', `filename`
            subdirs.append(filename)
            continue
        filesfound.append(filename)
        if info.has_key(filename) and info[filename] == infostuff:
            if verbose > 1:
                print 'Already have this version of',`filename`
            continue
        fullname = os.path.join(localdir, filename)
        tempname = os.path.join(localdir, '@'+filename)
        if interactive:
            doit = askabout('file', filename, pwd)
            if not doit:
                if not info.has_key(filename):
                    info[filename] = 'Not retrieved'
                continue
        try:
            os.unlink(tempname)
        except os.error:
            pass
        if mode[0] == 'l':
            if verbose:
                print "Creating symlink %s -> %s" % (
                        `filename`, `linkto`)
            try:
                os.symlink(linkto, tempname)
            except IOError, msg:
                print "Can't create %s: %s" % (
                        `tempname`, str(msg))
                continue
        else:
            try:
                fp = open(tempname, 'wb')
            except IOError, msg:
                print "Can't create %s: %s" % (
                        `tempname`, str(msg))
                continue
            if verbose:
                print 'Retrieving %s from %s as %s...' % \
                          (`filename`, `pwd`, `fullname`)
            if verbose:
                fp1 = LoggingFile(fp, 1024, sys.stdout)
            else:
                fp1 = fp
            t0 = time.time()
            try:
                f.retrbinary('RETR ' + filename,
                             fp1.write, 8*1024)
            except ftplib.error_perm, msg:
                print msg
            t1 = time.time()
            bytes = fp.tell()
            fp.close()
            if fp1 != fp:
                fp1.close()
        try:
            os.unlink(fullname)
        except os.error:
            pass            # Ignore the error
        try:
            os.rename(tempname, fullname)
        except os.error, msg:
            print "Can't rename %s to %s: %s" % (`tempname`,
                                                 `fullname`,
                                                 str(msg))
            continue
        info[filename] = infostuff
        writedict(info, infofilename)
        if verbose and mode[0] != 'l':
            dt = t1 - t0
            kbytes = bytes / 1024.0
            print int(round(kbytes)),
            print 'Kbytes in',
            print int(round(dt)),
            print 'seconds',
            if t1 > t0:
                print '(~%d Kbytes/sec)' % \
                          int(round(kbytes/dt),)
            print
    #
    # Remove files from info that are no longer remote
    deletions = 0
    for filename in info.keys():
        if filename not in filesfound:
            if verbose:
                print "Removing obsolete info entry for",
                print `filename`, "in", `localdir or "."`
            del info[filename]
            deletions = deletions + 1
    if deletions:
        writedict(info, infofilename)
    #
    # Remove local files that are no longer in the remote directory
    try:
        if not localdir: names = os.listdir(os.curdir)
        else: names = os.listdir(localdir)
    except os.error:
        names = []
    for name in names:
        if name[0] == '.' or info.has_key(name) or name in subdirs:
            continue
        skip = 0
        for pat in skippats:
            if fnmatch(name, pat):
                if verbose > 1:
                    print 'Skip pattern', `pat`,
                    print 'matches', `name`
                skip = 1
                break
        if skip:
            continue
        fullname = os.path.join(localdir, name)
        if not rmok:
            if verbose:
                print 'Local file', `fullname`,
                print 'is no longer pertinent'
            continue
        if verbose: print 'Removing local file/dir', `fullname`
        remove(fullname)
    #
    # Recursively mirror subdirectories
    for subdir in subdirs:
        if interactive:
            doit = askabout('subdirectory', subdir, pwd)
            if not doit: continue
        if verbose: print 'Processing subdirectory', `subdir`
        localsubdir = os.path.join(localdir, subdir)
        pwd = f.pwd()
        if verbose > 1:
            print 'Remote directory now:', `pwd`
            print 'Remote cwd', `subdir`
        try:
            f.cwd(subdir)
        except ftplib.error_perm, msg:
            print "Can't chdir to", `subdir`, ":", `msg`
        else:
            if verbose: print 'Mirroring as', `localsubdir`
            mirrorsubdir(f, localsubdir)
            if verbose > 1: print 'Remote cwd ..'
            f.cwd('..')
        newpwd = f.pwd()
        if newpwd != pwd:
            print 'Ended up in wrong directory after cd + cd ..'
            print 'Giving up now.'
            break
        else:
            if verbose > 1: print 'OK.'

# Helper to remove a file or directory tree
def remove(fullname):
    if os.path.isdir(fullname) and not os.path.islink(fullname):
        try:
            names = os.listdir(fullname)
        except os.error:
            names = []
        ok = 1
        for name in names:
            if not remove(os.path.join(fullname, name)):
                ok = 0
        if not ok:
            return 0
        try:
            os.rmdir(fullname)
        except os.error, msg:
            print "Can't remove local directory %s: %s" % \
                  (`fullname`, str(msg))
            return 0
    else:
        try:
            os.unlink(fullname)
        except os.error, msg:
            print "Can't remove local file %s: %s" % \
                  (`fullname`, str(msg))
            return 0
    return 1

# Wrapper around a file for writing to write a hash sign every block.
class LoggingFile:
    def __init__(self, fp, blocksize, outfp):
        self.fp = fp
        self.bytes = 0
        self.hashes = 0
        self.blocksize = blocksize
        self.outfp = outfp
    def write(self, data):
        self.bytes = self.bytes + len(data)
        hashes = int(self.bytes) / self.blocksize
        while hashes > self.hashes:
            self.outfp.write('#')
            self.outfp.flush()
            self.hashes = self.hashes + 1
        self.fp.write(data)
    def close(self):
        self.outfp.write('\n')

# Ask permission to download a file.
def askabout(filetype, filename, pwd):
    prompt = 'Retrieve %s %s from %s ? [ny] ' % (filetype, filename, pwd)
    while 1:
        reply = string.lower(string.strip(raw_input(prompt)))
        if reply in ['y', 'ye', 'yes']:
            return 1
        if reply in ['', 'n', 'no', 'nop', 'nope']:
            return 0
        print 'Please answer yes or no.'

# Create a directory if it doesn't exist.  Recursively create the
# parent directory as well if needed.
def makedir(pathname):
    if os.path.isdir(pathname):
        return
    dirname = os.path.dirname(pathname)
    if dirname: makedir(dirname)
    os.mkdir(pathname, 0777)

# Write a dictionary to a file in a way that can be read back using
# rval() but is still somewhat readable (i.e. not a single long line).
# Also creates a backup file.
def writedict(dict, filename):
    dir, file = os.path.split(filename)
    tempname = os.path.join(dir, '@' + file)
    backup = os.path.join(dir, file + '~')
    try:
        os.unlink(backup)
    except os.error:
        pass
    fp = open(tempname, 'w')
    fp.write('{\n')
    for key, value in dict.items():
        fp.write('%s: %s,\n' % (`key`, `value`))
    fp.write('}\n')
    fp.close()
    try:
        os.rename(filename, backup)
    except os.error:
        pass
    os.rename(tempname, filename)

if __name__ == '__main__':
    main()

--- NEW FILE: gencodec.py ---
""" Unicode Mapping Parser and Codec Generator.

This script parses Unicode mapping files as available from the Unicode
site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec
modules from them. The codecs use the standard character mapping codec
to actually apply the mapping.

Synopsis: gencodec.py dir codec_prefix

All files in dir are scanned and those producing non-empty mappings
will be written to <codec_prefix><mapname>.py with <mapname> being the
first part of the map's filename ('a' in a.b.c.txt) converted to
lowercase with hyphens replaced by underscores.

The tool also writes marshalled versions of the mapping tables to the
same location (with .mapping extension).

Written by Marc-Andre Lemburg (mal@lemburg.com).

(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright Guido van Rossum, 2000.

"""#"

import string,re,os,time,marshal

# Create numeric tables or character based ones ?
numeric = 1

mapRE = re.compile('((?:0x[0-9a-fA-F]+\+?)+)'
                   '\s+'
                   '((?:(?:0x[0-9a-fA-Z]+|<[A-Za-z]+>)\+?)*)'
                   '\s*'
                   '(#.+)?')

def parsecodes(codes,

               split=string.split,atoi=string.atoi,len=len,
               filter=filter,range=range):

    """ Converts code combinations to either a single code integer
        or a tuple of integers.

        meta-codes (in angular brackets, e.g. <LR> and <RL>) are
        ignored.

        Empty codes or illegal ones are returned as None.

    """
    if not codes:
        return None
    l = split(codes,'+')
    if len(l) == 1:
        return atoi(l[0],16)
    for i in range(len(l)):
        try:
            l[i] = atoi(l[i],16)
        except ValueError:
            l[i] = None
    l = filter(lambda x: x is not None, l)
    if len(l) == 1:
        return l[0]
    else:
        return tuple(l)

def readmap(filename,

            strip=string.strip):

    f = open(filename,'r')
    lines = f.readlines()
    f.close()
    enc2uni = {}
    identity = []
    unmapped = range(256)
    for i in range(256):
        unmapped[i] = i
    for line in lines:
        line = strip(line)
        if not line or line[0] == '#':
            continue
        m = mapRE.match(line)
        if not m:
            #print '* not matched: %s' % repr(line)
            continue
        enc,uni,comment = m.groups()
        enc = parsecodes(enc)
        uni = parsecodes(uni)
        if not comment:
            comment = ''
        else:
            comment = comment[1:]
        if enc < 256:
            unmapped.remove(enc)
            if enc == uni:
                identity.append(enc)
            else:
                enc2uni[enc] = (uni,comment)
        else:
            enc2uni[enc] = (uni,comment)
    # If there are more identity-mapped entries than unmapped entries,
    # it pays to generate an identity dictionary first, add add explicit
    # mappings to None for the rest
    if len(identity)>=len(unmapped):
        for enc in unmapped:
            enc2uni[enc] = (None, "")
        enc2uni['IDENTITY'] = 256

    return enc2uni

def hexrepr(t,

            join=string.join):

    if t is None:
        return 'None'
    try:
        len(t)
    except:
        return '0x%04x' % t
    return '(' + join(map(lambda t: '0x%04x' % t, t),', ') + ')'

def unicoderepr(t,

                join=string.join):

    if t is None:
        return 'None'
    if numeric:
        return hexrepr(t)
    else:
        try:
            len(t)
        except:
            return repr(unichr(t))
        return repr(join(map(unichr, t),''))

def keyrepr(t,

            join=string.join):

    if t is None:
        return 'None'
    if numeric:
        return hexrepr(t)
    else:
        try:
            len(t)
        except:
            if t < 256:
                return repr(chr(t))
            else:
                return repr(unichr(t))
        return repr(join(map(chr, t),''))

def codegen(name,map,comments=1):

    """ Returns Python source for the given map.

        Comments are included in the source, if comments is true (default).

    """
    l = [
        '''\
""" Python Character Mapping Codec generated from '%s' with gencodec.py.

Written by Marc-Andre Lemburg (mal@lemburg.com).

"""#"

import codecs

### Codec APIs

class Codec(codecs.Codec):

    def encode(self,input,errors='strict'):

        return codecs.charmap_encode(input,errors,encoding_map)

    def decode(self,input,errors='strict'):

        return codecs.charmap_decode(input,errors,decoding_map)

class StreamWriter(Codec,codecs.StreamWriter):
    pass

class StreamReader(Codec,codecs.StreamReader):
    pass

### encodings module API

def getregentry():

    return (Codec().encode,Codec().decode,StreamReader,StreamWriter)

### Decoding Map
''' % name,
        ]

    if map.has_key("IDENTITY"):
        l.append("decoding_map = codecs.make_identity_dict(range(%d))"
                 % map["IDENTITY"])
        l.append("decoding_map.update({")
        splits = 1
        del map["IDENTITY"]
    else:
        l.append("decoding_map = {")
        splits = 0

    mappings = map.items()
    mappings.sort()
    append = l.append
    i = 0
    for e,value in mappings:
        try:
            (u,c) = value
        except TypeError:
            u = value
            c = ''
        key = keyrepr(e)
        if c and comments:
            append('\t%s: %s,\t# %s' % (key,unicoderepr(u),c))
        else:
            append('\t%s: %s,' % (key,unicoderepr(u)))
        i += 1
        if i == 4096:
            # Split the definition into parts to that the Python
            # parser doesn't dump core
            if splits == 0:
                append('}')
            else:
                append('})')
            append('decoding_map.update({')
            i = 0
            splits = splits + 1
    if splits == 0:
        append('}')
    else:
        append('})')
    append('''
### Encoding Map

encoding_map = codecs.make_encoding_map(decoding_map)
''')
    return string.join(l,'\n')

def pymap(name,map,pyfile,comments=1):

    code = codegen(name,map,comments)
    f = open(pyfile,'w')
    f.write(code)
    f.close()

def marshalmap(name,map,marshalfile):

    d = {}
    for e,(u,c) in map.items():
        d[e] = (u,c)
    f = open(marshalfile,'wb')
    marshal.dump(d,f)
    f.close()

def convertdir(dir,prefix='',comments=1):

    mapnames = os.listdir(dir)
    for mapname in mapnames:
        name = os.path.split(mapname)[1]
        name = string.replace(name,'-','_')
        name = string.split(name, '.')[0]
        name = string.lower(name)
        codefile = name + '.py'
        marshalfile = name + '.mapping'
        print 'converting %s to %s and %s' % (mapname,
                                              prefix + codefile,
                                              prefix + marshalfile)
        try:
            map = readmap(os.path.join(dir,mapname))
            if not map:
                print '* map is empty; skipping'
            else:
                pymap(mapname, map, prefix + codefile,comments)
                marshalmap(mapname, map, prefix + marshalfile)
        except ValueError:
            print '* conversion failed'

def rewritepythondir(dir,prefix='',comments=1):

    mapnames = os.listdir(dir)
    for mapname in mapnames:
        if not mapname.endswith('.mapping'):
            continue
        codefile = mapname[:-len('.mapping')] + '.py'
        print 'converting %s to %s' % (mapname,
                                       prefix + codefile)
        try:
            map = marshal.load(open(os.path.join(dir,mapname),
                               'rb'))
            if not map:
                print '* map is empty; skipping'
            else:
                pymap(mapname, map, prefix + codefile,comments)
        except ValueError, why:
            print '* conversion failed: %s' % why

if __name__ == '__main__':

    import sys
    if 1:
        apply(convertdir,tuple(sys.argv[1:]))
    else:
        apply(rewritepythondir,tuple(sys.argv[1:]))

--- NEW FILE: h2py.py ---
#! /usr/bin/env python

# Read #define's and translate to Python code.
# Handle #include statements.
# Handle #define macros with one argument.
# Anything that isn't recognized or doesn't translate into valid
# Python is ignored.

# Without filename arguments, acts as a filter.
# If one or more filenames are given, output is written to corresponding
# filenames in the local directory, translated to all uppercase, with
# the extension replaced by ".py".

# By passing one or more options of the form "-i regular_expression"
# you can specify additional strings to be ignored.  This is useful
# e.g. to ignore casts to u_long: simply specify "-i '(u_long)'".

# XXX To do:
# - turn trailing C comments into Python comments
# - turn C Boolean operators "&& || !" into Python "and or not"
# - what to do about #if(def)?
# - what to do about macros with multiple parameters?

import sys, regex, regsub, string, getopt, os

p_define = regex.compile('^[\t ]*#[\t ]*define[\t ]+\([a-zA-Z0-9_]+\)[\t ]+')

p_macro = regex.compile(
  '^[\t ]*#[\t ]*define[\t ]+'
  '\([a-zA-Z0-9_]+\)(\([_a-zA-Z][_a-zA-Z0-9]*\))[\t ]+')

p_include = regex.compile('^[\t ]*#[\t ]*include[\t ]+<\([a-zA-Z0-9_/\.]+\)')

p_comment = regex.compile('/\*\([^*]+\|\*+[^/]\)*\(\*+/\)?')
p_cpp_comment = regex.compile('//.*')

ignores = [p_comment, p_cpp_comment]

p_char = regex.compile("'\(\\\\.[^\\\\]*\|[^\\\\]\)'")

filedict = {}

try:
    searchdirs=string.splitfields(os.environ['include'],';')
except KeyError:
    try:
        searchdirs=string.splitfields(os.environ['INCLUDE'],';')
    except KeyError:
        try:
            if string.find( sys.platform, "beos" ) == 0:
                searchdirs=string.splitfields(os.environ['BEINCLUDES'],';')
            else:
                raise KeyError
        except KeyError:
            searchdirs=['/usr/include']

def main():
    global filedict
    opts, args = getopt.getopt(sys.argv[1:], 'i:')
    for o, a in opts:
        if o == '-i':
            ignores.append(regex.compile(a))
    if not args:
        args = ['-']
    for filename in args:
        if filename == '-':
            sys.stdout.write('# Generated by h2py from stdin\n')
            process(sys.stdin, sys.stdout)
        else:
            fp = open(filename, 'r')
            outfile = os.path.basename(filename)
            i = string.rfind(outfile, '.')
            if i > 0: outfile = outfile[:i]
            outfile = string.upper(outfile)
            outfile = outfile + '.py'
            outfp = open(outfile, 'w')
            outfp.write('# Generated by h2py from %s\n' % filename)
            filedict = {}
            for dir in searchdirs:
                if filename[:len(dir)] == dir:
                    filedict[filename[len(dir)+1:]] = None  # no '/' trailing
                    break
            process(fp, outfp)
            outfp.close()
            fp.close()

def process(fp, outfp, env = {}):
    lineno = 0
    while 1:
        line = fp.readline()
        if not line: break
        lineno = lineno + 1
        n = p_define.match(line)
        if n >= 0:
            # gobble up continuation lines
            while line[-2:] == '\\\n':
                nextline = fp.readline()
                if not nextline: break
                lineno = lineno + 1
                line = line + nextline
            name = p_define.group(1)
            body = line[n:]
            # replace ignored patterns by spaces
            for p in ignores:
                body = regsub.gsub(p, ' ', body)
            # replace char literals by ord(...)
            body = regsub.gsub(p_char, 'ord(\\0)', body)
            stmt = '%s = %s\n' % (name, string.strip(body))
            ok = 0
            try:
                exec stmt in env
            except:
                sys.stderr.write('Skipping: %s' % stmt)
            else:
                outfp.write(stmt)
        n =p_macro.match(line)
        if n >= 0:
            macro, arg = p_macro.group(1, 2)
            body = line[n:]
            for p in ignores:
                body = regsub.gsub(p, ' ', body)
            body = regsub.gsub(p_char, 'ord(\\0)', body)
            stmt = 'def %s(%s): return %s\n' % (macro, arg, body)
            try:
                exec stmt in env
            except:
                sys.stderr.write('Skipping: %s' % stmt)
            else:
                outfp.write(stmt)
        if p_include.match(line) >= 0:
            regs = p_include.regs
            a, b = regs[1]
            filename = line[a:b]
            if not filedict.has_key(filename):
                filedict[filename] = None
                inclfp = None
                for dir in searchdirs:
                    try:
                        inclfp = open(dir + '/' + filename, 'r')
                        break
                    except IOError:
                        pass
                if inclfp:
                    outfp.write(
                            '\n# Included from %s\n' % filename)
                    process(inclfp, outfp, env)
                else:
                    sys.stderr.write('Warning - could not find file %s' % filename)

main()

--- NEW FILE: ifdef.py ---
#! /usr/bin/env python

# Selectively preprocess #ifdef / #ifndef statements.
# Usage:
# ifdef [-Dname] ... [-Uname] ... [file] ...
#
# This scans the file(s), looking for #ifdef and #ifndef preprocessor
# commands that test for one of the names mentioned in the -D and -U
# options.  On standard output it writes a copy of the input file(s)
# minus those code sections that are suppressed by the selected
# combination of defined/undefined symbols.  The #if(n)def/#else/#else
# lines themselfs (if the #if(n)def tests for one of the mentioned
# names) are removed as well.

# Features: Arbitrary nesting of recognized and unrecognized
# preprocesor statements works correctly.  Unrecognized #if* commands
# are left in place, so it will never remove too much, only too
# little.  It does accept whitespace around the '#' character.

# Restrictions: There should be no comments or other symbols on the
# #if(n)def lines.  The effect of #define/#undef commands in the input
# file or in included files is not taken into account.  Tests using
# #if and the defined() pseudo function are not recognized.  The #elif
# command is not recognized.  Improperly nesting is not detected.
# Lines that look like preprocessor commands but which are actually
# part of comments or string literals will be mistaken for
# preprocessor commands.

import sys
import regex
import getopt
import string

defs = []
undefs = []

def main():
    opts, args = getopt.getopt(sys.argv[1:], 'D:U:')
    for o, a in opts:
        if o == '-D':
            defs.append(a)
        if o == '-U':
            undefs.append(a)
    if not args:
        args = ['-']
    for file in args:
        if file == '-':
            process(sys.stdin, sys.stdout)
        else:
            f = open(file, 'r')
            process(f, sys.stdout)
            f.close()

def process(fpi, fpo):
    keywords = ('if', 'ifdef', 'ifndef', 'else', 'endif')
    ok = 1
    stack = []
    while 1:
        line = fpi.readline()
        if not line: break
        while line[-2:] == '\\\n':
            nextline = fpi.readline()
            if not nextline: break
            line = line + nextline
        tmp = string.strip(line)
        if tmp[:1] != '#':
            if ok: fpo.write(line)
            continue
        tmp = string.strip(tmp[1:])
        words = string.split(tmp)
        keyword = words[0]
        if keyword not in keywords:
            if ok: fpo.write(line)
            continue
        if keyword in ('ifdef', 'ifndef') and len(words) == 2:
            if keyword == 'ifdef':
                ko = 1
            else:
                ko = 0
            word = words[1]
            if word in defs:
                stack.append((ok, ko, word))
                if not ko: ok = 0
            elif word in undefs:
                stack.append((ok, not ko, word))
                if ko: ok = 0
            else:
                stack.append((ok, -1, word))
                if ok: fpo.write(line)
        elif keyword == 'if':
            stack.append((ok, -1, ''))
            if ok: fpo.write(line)
        elif keyword == 'else' and stack:
            s_ok, s_ko, s_word = stack[-1]
            if s_ko < 0:
                if ok: fpo.write(line)
            else:
                s_ko = not s_ko
                ok = s_ok
                if not s_ko: ok = 0
                stack[-1] = s_ok, s_ko, s_word
        elif keyword == 'endif' and stack:
            s_ok, s_ko, s_word = stack[-1]
            if s_ko < 0:
                if ok: fpo.write(line)
            del stack[-1]
            ok = s_ok
        else:
            sys.stderr.write('Unknown keyword %s\n' % keyword)
    if stack:
        sys.stderr.write('stack: %s\n' % stack)

main()

--- NEW FILE: lfcr.py ---
#! /usr/bin/env python

"Replace LF with CRLF in argument files.  Print names of changed files."

import sys, re, os
for file in sys.argv[1:]:
    if os.path.isdir(file):
        print file, "Directory!"
        continue
    data = open(file, "rb").read()
    if '\0' in data:
        print file, "Binary!"
        continue
    newdata = re.sub("\r?\n", "\r\n", data)
    if newdata != data:
        print file
        f = open(file, "wb")
        f.write(newdata)
        f.close()

--- NEW FILE: linktree.py ---
#! /usr/bin/env python

# linktree
#
# Make a copy of a directory tree with symbolic links to all files in the
# original tree.
# All symbolic links go to a special symbolic link at the top, so you
# can easily fix things if the original source tree moves.
# See also "mkreal".
#
# usage: mklinks oldtree newtree

import sys, os

LINK = '.LINK' # Name of special symlink at the top.

debug = 0

def main():
    if not 3 <= len(sys.argv) <= 4:
        print 'usage:', sys.argv[0], 'oldtree newtree [linkto]'
        return 2
    oldtree, newtree = sys.argv[1], sys.argv[2]
    if len(sys.argv) > 3:
        link = sys.argv[3]
        link_may_fail = 1
    else:
        link = LINK
        link_may_fail = 0
    if not os.path.isdir(oldtree):
        print oldtree + ': not a directory'
        return 1
    try:
        os.mkdir(newtree, 0777)
    except os.error, msg:
        print newtree + ': cannot mkdir:', msg
        return 1
    linkname = os.path.join(newtree, link)
    try:
        os.symlink(os.path.join(os.pardir, oldtree), linkname)
    except os.error, msg:
        if not link_may_fail:
            print linkname + ': cannot symlink:', msg
            return 1
        else:
            print linkname + ': warning: cannot symlink:', msg
    linknames(oldtree, newtree, link)
    return 0

def linknames(old, new, link):
    if debug: print 'linknames', (old, new, link)
    try:
        names = os.listdir(old)
    except os.error, msg:
        print old + ': warning: cannot listdir:', msg
        return
    for name in names:
        if name not in (os.curdir, os.pardir):
            oldname = os.path.join(old, name)
            linkname = os.path.join(link, name)
            newname = os.path.join(new, name)
            if debug > 1: print oldname, newname, linkname
            if os.path.isdir(oldname) and \
               not os.path.islink(oldname):
                try:
                    os.mkdir(newname, 0777)
                    ok = 1
                except:
                    print newname + \
                          ': warning: cannot mkdir:', msg
                    ok = 0
                if ok:
                    linkname = os.path.join(os.pardir,
                                            linkname)
                    linknames(oldname, newname, linkname)
            else:
                os.symlink(linkname, newname)

sys.exit(main())

--- NEW FILE: lll.py ---
#! /usr/bin/env python

# Find symbolic links and show where they point to.
# Arguments are directories to search; default is current directory.
# No recursion.
# (This is a totally different program from "findsymlinks.py"!)

import sys, os

def lll(dirname):
    for name in os.listdir(dirname):
        if name not in (os.curdir, os.pardir):
            full = os.path.join(dirname, name)
            if os.path.islink(full):
                print name, '->', os.readlink(full)

args = sys.argv[1:]
if not args: args = [os.curdir]
first = 1
for arg in args:
    if len(args) > 1:
        if not first: print
        first = 0
        print arg + ':'
    lll(arg)

--- NEW FILE: logmerge.py ---
#! /usr/bin/env python

"""Consolidate a bunch of CVS or RCS logs read from stdin.

Input should be the output of a CVS or RCS logging command, e.g.

    cvs log -rrelease14:

which dumps all log messages from release1.4 upwards (assuming that
release 1.4 was tagged with tag 'release14').  Note the trailing
colon!

This collects all the revision records and outputs them sorted by date
rather than by file, collapsing duplicate revision record, i.e.,
records with the same message for different files.

The -t option causes it to truncate (discard) the last revision log
entry; this is useful when using something like the above cvs log
command, which shows the revisions including the given tag, while you
probably want everything *since* that tag.

XXX This code was created by reverse engineering CVS 1.9 and RCS 5.7
from their output.

"""

import os, sys, getopt, string, re

sep1 = '='*77 + '\n'                    # file separator
sep2 = '-'*28 + '\n'                    # revision separator

def main():
    """Main program"""
    truncate_last = 0
    reverse = 0
    opts, args = getopt.getopt(sys.argv[1:], "tr")
    for o, a in opts:
        if o == '-t':
            truncate_last = 1
        elif o == '-r':
            reverse = 1
    database = []
    while 1:
        chunk = read_chunk(sys.stdin)
        if not chunk:
            break
        records = digest_chunk(chunk)
        if truncate_last:
            del records[-1]
        database[len(database):] = records
    database.sort()
    if not reverse:
        database.reverse()
    format_output(database)

def read_chunk(fp):
    """Read a chunk -- data for one file, ending with sep1.

    Split the chunk in parts separated by sep2.

    """
    chunk = []
    lines = []
    while 1:
        line = fp.readline()
        if not line:
            break
        if line == sep1:
            if lines:
                chunk.append(lines)
            break
        if line == sep2:
            if lines:
                chunk.append(lines)
                lines = []
        else:
            lines.append(line)
    return chunk

def digest_chunk(chunk):
    """Digest a chunk -- extrach working file name and revisions"""
    lines = chunk[0]
    key = 'Working file:'
    keylen = len(key)
    for line in lines:
        if line[:keylen] == key:
            working_file = string.strip(line[keylen:])
            break
    else:
        working_file = None
    records = []
    for lines in chunk[1:]:
        revline = lines[0]
        dateline = lines[1]
        text = lines[2:]
        words = string.split(dateline)
        author = None
        if len(words) >= 3 and words[0] == 'date:':
            dateword = words[1]
            timeword = words[2]
            if timeword[-1:] == ';':
                timeword = timeword[:-1]
            date = dateword + ' ' + timeword
            if len(words) >= 5 and words[3] == 'author:':
                author = words[4]
                if author[-1:] == ';':
                    author = author[:-1]
        else:
            date = None
            text.insert(0, revline)
        words = string.split(revline)
        if len(words) >= 2 and words[0] == 'revision':
            rev = words[1]
        else:
            rev = None
            text.insert(0, revline)
        records.append((date, working_file, rev, author, text))
    return records

def format_output(database):
    prevtext = None
    prev = []
    database.append((None, None, None, None, None)) # Sentinel
    for (date, working_file, rev, author, text) in database:
        if text != prevtext:
            if prev:
                print sep2,
                for (p_date, p_working_file, p_rev, p_author) in prev:
                    print p_date, p_author, p_working_file, p_rev
                sys.stdout.writelines(prevtext)
            prev = []
        prev.append((date, working_file, rev, author))
        prevtext = text

main()

--- NEW FILE: mailerdaemon.py ---
"""mailerdaemon - classes to parse mailer-daemon messages"""

import string
import rfc822
import calendar
import re
import os
import sys

Unparseable = 'mailerdaemon.Unparseable'

class ErrorMessage(rfc822.Message):
    def __init__(self, fp):
        rfc822.Message.__init__(self, fp)
        self.sub = ''

    def is_warning(self):
        sub = self.getheader('Subject')
        if not sub:
            return 0
        sub = string.lower(sub)
        if sub[:12] == 'waiting mail': return 1
        if string.find(sub, 'warning') >= 0: return 1
        self.sub = sub
        return 0

    def get_errors(self):
        for p in EMPARSERS:
            self.rewindbody()
            try:
                return p(self.fp, self.sub)
            except Unparseable:
                pass
        raise Unparseable

# List of re's or tuples of re's.
# If a re, it should contain at least a group (?P<email>...) which
# should refer to the email address.  The re can also contain a group
# (?P<reason>...) which should refer to the reason (error message).
# If no reason is present, the emparse_list_reason list is used to
# find a reason.
# If a tuple, the tuple should contain 2 re's.  The first re finds a
# location, the second re is repeated one or more times to find
# multiple email addresses.  The second re is matched (not searched)
# where the previous match ended.
# The re's are compiled using the re module.
emparse_list_list = [
    'error: (?P<reason>unresolvable): (?P<email>.+)',
    ('----- The following addresses had permanent fatal errors -----\n',
     '(?P<email>[^ \n].*)\n( .*\n)?'),
    'remote execution.*\n.*rmail (?P<email>.+)',
    ('The following recipients did not receive your message:\n\n',
     ' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
    '------- Failure Reasons  --------\n\n(?P<reason>.*)\n(?P<email>.*)',
    '^<(?P<email>.*)>:\n(?P<reason>.*)',
    '^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
    '^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
    '^Original-Recipient: rfc822;(?P<email>.*)',
    '^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
    '^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
    '^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
    '^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
    '^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
    ]
# compile the re's in the list and store them in-place.
for i in range(len(emparse_list_list)):
    x = emparse_list_list[i]
    if type(x) is type(''):
        x = re.compile(x, re.MULTILINE)
    else:
        xl = []
        for x in x:
            xl.append(re.compile(x, re.MULTILINE))
        x = tuple(xl)
        del xl
    emparse_list_list[i] = x
    del x
del i

# list of re's used to find reasons (error messages).
# if a string, "<>" is replaced by a copy of the email address.
# The expressions are searched for in order.  After the first match,
# no more expressions are searched for.  So, order is important.
emparse_list_reason = [
    r'^5\d{2} <>\.\.\. (?P<reason>.*)',
    '<>\.\.\. (?P<reason>.*)',
    re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
    re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
    re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
    ]
emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
def emparse_list(fp, sub):
    data = fp.read()
    res = emparse_list_from.search(data)
    if res is None:
        from_index = len(data)
    else:
        from_index = res.start(0)
    errors = []
    emails = []
    reason = None
    for regexp in emparse_list_list:
        if type(regexp) is type(()):
            res = regexp[0].search(data, 0, from_index)
            if res is not None:
                try:
                    reason = res.group('reason')
                except IndexError:
                    pass
                while 1:
                    res = regexp[1].match(data, res.end(0), from_index)
                    if res is None:
                        break
                    emails.append(res.group('email'))
                break
        else:
            res = regexp.search(data, 0, from_index)
            if res is not None:
                emails.append(res.group('email'))
                try:
                    reason = res.group('reason')
                except IndexError:
                    pass
                break
    if not emails:
        raise Unparseable
    if not reason:
        reason = sub
        if reason[:15] == 'returned mail: ':
            reason = reason[15:]
        for regexp in emparse_list_reason:
            if type(regexp) is type(''):
                for i in range(len(emails)-1,-1,-1):
                    email = emails[i]
                    exp = re.compile(string.join(string.split(regexp, '<>'), re.escape(email)), re.MULTILINE)
                    res = exp.search(data)
                    if res is not None:
                        errors.append(string.join(string.split(string.strip(email)+': '+res.group('reason'))))
                        del emails[i]
                continue
            res = regexp.search(data)
            if res is not None:
                reason = res.group('reason')
                break
    for email in emails:
        errors.append(string.join(string.split(string.strip(email)+': '+reason)))
    return errors

EMPARSERS = [emparse_list, ]

def sort_numeric(a, b):
    a = string.atoi(a)
    b = string.atoi(b)
    if a < b: return -1
    elif a > b: return 1
    else: return 0

def parsedir(dir, modify):
    os.chdir(dir)
    pat = re.compile('^[0-9]*$')
    errordict = {}
    errorfirst = {}
    errorlast = {}
    nok = nwarn = nbad = 0

    # find all numeric file names and sort them
    files = filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.'))
    files.sort(sort_numeric)

    for fn in files:
        # Lets try to parse the file.
        fp = open(fn)
        m = ErrorMessage(fp)
        sender = m.getaddr('From')
        print '%s\t%-40s\t'%(fn, sender[1]),

        if m.is_warning():
            fp.close()
            print 'warning only'
            nwarn = nwarn + 1
            if modify:
                os.rename(fn, ','+fn)
##              os.unlink(fn)
            continue

        try:
            errors = m.get_errors()
        except Unparseable:
            print '** Not parseable'
            nbad = nbad + 1
            fp.close()
            continue
        print len(errors), 'errors'

        # Remember them
        for e in errors:
            try:
                mm, dd = m.getdate('date')[1:1+2]
                date = '%s %02d' % (calendar.month_abbr[mm], dd)
            except:
                date = '??????'
            if not errordict.has_key(e):
                errordict[e] = 1
                errorfirst[e] = '%s (%s)' % (fn, date)
            else:
                errordict[e] = errordict[e] + 1
            errorlast[e] = '%s (%s)' % (fn, date)

        fp.close()
        nok = nok + 1
        if modify:
            os.rename(fn, ','+fn)
##          os.unlink(fn)

    print '--------------'
    print nok, 'files parsed,',nwarn,'files warning-only,',
    print nbad,'files unparseable'
    print '--------------'
    list = []
    for e in errordict.keys():
        list.append((errordict[e], errorfirst[e], errorlast[e], e))
    list.sort()
    for num, first, last, e in list:
        print '%d %s - %s\t%s' % (num, first, last, e)

def main():
    modify = 0
    if len(sys.argv) > 1 and sys.argv[1] == '-d':
        modify = 1
        del sys.argv[1]
    if len(sys.argv) > 1:
        for folder in sys.argv[1:]:
            parsedir(folder, modify)
    else:
        parsedir('/ufs/jack/Mail/errorsinbox', modify)

if __name__ == '__main__' or sys.argv[0] == __name__:
    main()

--- NEW FILE: md5sum.py ---
#! /usr/bin/env python

"""Python utility to print MD5 checksums of argument files.

Works with Python 1.5.2 and later.
"""

import sys, md5

BLOCKSIZE = 1024*1024

def hexify(s):
    return ("%02x"*len(s)) % tuple(map(ord, s))

def main():
    args = sys.argv[1:]
    if not args:
        sys.stderr.write("usage: %s file ...\n" % sys.argv[0])
        sys.exit(2)
    for file in sys.argv[1:]:
        f = open(file, "rb")
        sum = md5.new()
        while 1:
            block = f.read(BLOCKSIZE)
            if not block:
                break
            sum.update(block)
        f.close()
        print hexify(sum.digest()), file

if __name__ == "__main__":
    main()

--- NEW FILE: methfix.py ---
#! /usr/bin/env python

# Fix Python source files to avoid using
#       def method(self, (arg1, ..., argn)):
# instead of the more rational
#       def method(self, arg1, ..., argn):
#
# Command line arguments are files or directories to be processed.
# Directories are searched recursively for files whose name looks
# like a python module.
# Symbolic links are always ignored (except as explicit directory
# arguments).  Of course, the original file is kept as a back-up
# (with a "~" attached to its name).
# It complains about binaries (files containing null bytes)
# and about files that are ostensibly not Python files: if the first
# line starts with '#!' and does not contain the string 'python'.
#
# Changes made are reported to stdout in a diff-like format.
#
# Undoubtedly you can do this using find and sed or perl, but this is
# a nice example of Python code that recurses down a directory tree
# and uses regular expressions.  Also note several subtleties like
# preserving the file's mode and avoiding to even write a temp file
# when no changes are needed for a file.
#
# NB: by changing only the function fixline() you can turn this
# into a program for a different change to Python programs...

import sys
import regex
import os
from stat import *
import string

err = sys.stderr.write
dbg = err
rep = sys.stdout.write

def main():
    bad = 0
    if not sys.argv[1:]: # No arguments
        err('usage: ' + sys.argv[0] + ' file-or-directory ...\n')
        sys.exit(2)
    for arg in sys.argv[1:]:
        if os.path.isdir(arg):
            if recursedown(arg): bad = 1
        elif os.path.islink(arg):
            err(arg + ': will not process symbolic links\n')
            bad = 1
        else:
            if fix(arg): bad = 1
    sys.exit(bad)

ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$')
def ispython(name):
    return ispythonprog.match(name) >= 0

def recursedown(dirname):
    dbg('recursedown(' + `dirname` + ')\n')
    bad = 0
    try:
        names = os.listdir(dirname)
    except os.error, msg:
        err(dirname + ': cannot list directory: ' + `msg` + '\n')
        return 1
    names.sort()
    subdirs = []
    for name in names:
        if name in (os.curdir, os.pardir): continue
        fullname = os.path.join(dirname, name)
        if os.path.islink(fullname): pass
        elif os.path.isdir(fullname):
            subdirs.append(fullname)
        elif ispython(name):
            if fix(fullname): bad = 1
    for fullname in subdirs:
        if recursedown(fullname): bad = 1
    return bad

def fix(filename):
##  dbg('fix(' + `filename` + ')\n')
    try:
        f = open(filename, 'r')
    except IOError, msg:
        err(filename + ': cannot open: ' + `msg` + '\n')
        return 1
    head, tail = os.path.split(filename)
    tempname = os.path.join(head, '@' + tail)
    g = None
    # If we find a match, we rewind the file and start over but
    # now copy everything to a temp file.
    lineno = 0
    while 1:
        line = f.readline()
        if not line: break
        lineno = lineno + 1
        if g is None and '\0' in line:
            # Check for binary files
            err(filename + ': contains null bytes; not fixed\n')
            f.close()
            return 1
        if lineno == 1 and g is None and line[:2] == '#!':
            # Check for non-Python scripts
            words = string.split(line[2:])
            if words and regex.search('[pP]ython', words[0]) < 0:
                msg = filename + ': ' + words[0]
                msg = msg + ' script; not fixed\n'
                err(msg)
                f.close()
                return 1
        while line[-2:] == '\\\n':
            nextline = f.readline()
            if not nextline: break
            line = line + nextline
            lineno = lineno + 1
        newline = fixline(line)
        if newline != line:
            if g is None:
                try:
                    g = open(tempname, 'w')
                except IOError, msg:
                    f.close()
                    err(tempname+': cannot create: '+\
                        `msg`+'\n')
                    return 1
                f.seek(0)
                lineno = 0
                rep(filename + ':\n')
                continue # restart from the beginning
            rep(`lineno` + '\n')
            rep('< ' + line)
            rep('> ' + newline)
        if g is not None:
            g.write(newline)

    # End of file
    f.close()
    if not g: return 0 # No changes

    # Finishing touch -- move files

    # First copy the file's mode to the temp file
    try:
        statbuf = os.stat(filename)
        os.chmod(tempname, statbuf[ST_MODE] & 07777)
    except os.error, msg:
        err(tempname + ': warning: chmod failed (' + `msg` + ')\n')
    # Then make a backup of the original file as filename~
    try:
        os.rename(filename, filename + '~')
    except os.error, msg:
        err(filename + ': warning: backup failed (' + `msg` + ')\n')
    # Now move the temp file to the original file
    try:
        os.rename(tempname, filename)
    except os.error, msg:
        err(filename + ': rename failed (' + `msg` + ')\n')
        return 1
    # Return succes
    return 0

fixpat = '^[ \t]+def +[a-zA-Z0-9_]+ *( *self *, *\(( *\(.*\) *)\) *) *:'
fixprog = regex.compile(fixpat)

def fixline(line):
    if fixprog.match(line) >= 0:
        (a, b), (c, d) = fixprog.regs[1:3]
        line = line[:a] + line[c:d] + line[b:]
    return line

main()

--- NEW FILE: mkreal.py ---
#! /usr/bin/env python

# mkreal
#
# turn a symlink to a directory into a real directory

import sys
import os
from stat import *

join = os.path.join

error = 'mkreal error'

BUFSIZE = 32*1024

def mkrealfile(name):
    st = os.stat(name) # Get the mode
    mode = S_IMODE(st[ST_MODE])
    linkto = os.readlink(name) # Make sure again it's a symlink
    f_in = open(name, 'r') # This ensures it's a file
    os.unlink(name)
    f_out = open(name, 'w')
    while 1:
        buf = f_in.read(BUFSIZE)
        if not buf: break
        f_out.write(buf)
    del f_out # Flush data to disk before changing mode
    os.chmod(name, mode)

def mkrealdir(name):
    st = os.stat(name) # Get the mode
    mode = S_IMODE(st[ST_MODE])
    linkto = os.readlink(name)
    files = os.listdir(name)
    os.unlink(name)
    os.mkdir(name, mode)
    os.chmod(name, mode)
    linkto = join(os.pardir, linkto)
    #
    for file in files:
        if file not in (os.curdir, os.pardir):
            os.symlink(join(linkto, file), join(name, file))

def main():
    sys.stdout = sys.stderr
    progname = os.path.basename(sys.argv[0])
    if progname == '-c': progname = 'mkreal'
    args = sys.argv[1:]
    if not args:
        print 'usage:', progname, 'path ...'
        sys.exit(2)
    status = 0
    for name in args:
        if not os.path.islink(name):
            print progname+':', name+':', 'not a symlink'
            status = 1
        else:
            if os.path.isdir(name):
                mkrealdir(name)
            else:
                mkrealfile(name)
    sys.exit(status)

main()

--- NEW FILE: ndiff.py ---
#! /usr/bin/env python

# Module ndiff version 1.6.0
# Released to the public domain 08-Dec-2000,
# by Tim Peters (tim.one@home.com).

# Provided as-is; use at your own risk; no warranty; no promises; enjoy!

"""ndiff [-q] file1 file2
    or
ndiff (-r1 | -r2) < ndiff_output > file1_or_file2

Print a human-friendly file difference report to stdout.  Both inter-
and intra-line differences are noted.  In the second form, recreate file1
(-r1) or file2 (-r2) on stdout, from an ndiff report on stdin.

In the first form, if -q ("quiet") is not specified, the first two lines
of output are

-: file1
+: file2

Each remaining line begins with a two-letter code:

    "- "    line unique to file1
    "+ "    line unique to file2
    "  "    line common to both files
    "? "    line not present in either input file

Lines beginning with "? " attempt to guide the eye to intraline
differences, and were not present in either input file.  These lines can be
confusing if the source files contain tab characters.

The first file can be recovered by retaining only lines that begin with
"  " or "- ", and deleting those 2-character prefixes; use ndiff with -r1.

The second file can be recovered similarly, but by retaining only "  " and
"+ " lines; use ndiff with -r2; or, on Unix, the second file can be
recovered by piping the output through

    sed -n '/^[+ ] /s/^..//p'

See module comments for details and programmatic interface.
"""

__version__ = 1, 5, 0

# SequenceMatcher tries to compute a "human-friendly diff" between
# two sequences (chiefly picturing a file as a sequence of lines,
# and a line as a sequence of characters, here).  Unlike e.g. UNIX(tm)
# diff, the fundamental notion is the longest *contiguous* & junk-free
# matching subsequence.  That's what catches peoples' eyes.  The
# Windows(tm) windiff has another interesting notion, pairing up elements
# that appear uniquely in each sequence.  That, and the method here,
# appear to yield more intuitive difference reports than does diff.  This
# method appears to be the least vulnerable to synching up on blocks
# of "junk lines", though (like blank lines in ordinary text files,
# or maybe "<P>" lines in HTML files).  That may be because this is
# the only method of the 3 that has a *concept* of "junk" <wink>.
#
# Note that ndiff makes no claim to produce a *minimal* diff.  To the
# contrary, minimal diffs are often counter-intuitive, because they
# synch up anywhere possible, sometimes accidental matches 100 pages
# apart.  Restricting synch points to contiguous matches preserves some
# notion of locality, at the occasional cost of producing a longer diff.
#
# With respect to junk, an earlier version of ndiff simply refused to
# *start* a match with a junk element.  The result was cases like this:
#     before: private Thread currentThread;
#     after:  private volatile Thread currentThread;
# If you consider whitespace to be junk, the longest contiguous match
# not starting with junk is "e Thread currentThread".  So ndiff reported
# that "e volatil" was inserted between the 't' and the 'e' in "private".
# While an accurate view, to people that's absurd.  The current version
# looks for matching blocks that are entirely junk-free, then extends the
# longest one of those as far as possible but only with matching junk.
# So now "currentThread" is matched, then extended to suck up the
# preceding blank; then "private" is matched, and extended to suck up the
# following blank; then "Thread" is matched; and finally ndiff reports
# that "volatile " was inserted before "Thread".  The only quibble
# remaining is that perhaps it was really the case that " volatile"
# was inserted after "private".  I can live with that <wink>.
#
# NOTE on junk:  the module-level names
#    IS_LINE_JUNK
#    IS_CHARACTER_JUNK
# can be set to any functions you like.  The first one should accept
# a single string argument, and return true iff the string is junk.
# The default is whether the regexp r"\s*#?\s*$" matches (i.e., a
# line without visible characters, except for at most one splat).
# The second should accept a string of length 1 etc.  The default is
# whether the character is a blank or tab (note: bad idea to include
# newline in this!).
#
# After setting those, you can call fcompare(f1name, f2name) with the
# names of the files you want to compare.  The difference report
# is sent to stdout.  Or you can call main(args), passing what would
# have been in sys.argv[1:] had the cmd-line form been used.

from difflib import SequenceMatcher

import string
TRACE = 0

# define what "junk" means
import re

def IS_LINE_JUNK(line, pat=re.compile(r"\s*#?\s*$").match):
    return pat(line) is not None

def IS_CHARACTER_JUNK(ch, ws=" \t"):
    return ch in ws

del re

# meant for dumping lines
def dump(tag, x, lo, hi):
    for i in xrange(lo, hi):
        print tag, x[i],

def plain_replace(a, alo, ahi, b, blo, bhi):
    assert alo < ahi and blo < bhi
    # dump the shorter block first -- reduces the burden on short-term
    # memory if the blocks are of very different sizes
    if bhi - blo < ahi - alo:
        dump('+', b, blo, bhi)
        dump('-', a, alo, ahi)
    else:
        dump('-', a, alo, ahi)
        dump('+', b, blo, bhi)

# When replacing one block of lines with another, this guy searches
# the blocks for *similar* lines; the best-matching pair (if any) is
# used as a synch point, and intraline difference marking is done on
# the similar pair.  Lots of work, but often worth it.

def fancy_replace(a, alo, ahi, b, blo, bhi):
    if TRACE:
        print '*** fancy_replace', alo, ahi, blo, bhi
        dump('>', a, alo, ahi)
        dump('<', b, blo, bhi)

    # don't synch up unless the lines have a similarity score of at
    # least cutoff; best_ratio tracks the best score seen so far
    best_ratio, cutoff = 0.74, 0.75
    cruncher = SequenceMatcher(IS_CHARACTER_JUNK)
    eqi, eqj = None, None   # 1st indices of equal lines (if any)

    # search for the pair that matches best without being identical
    # (identical lines must be junk lines, & we don't want to synch up
    # on junk -- unless we have to)
    for j in xrange(blo, bhi):
        bj = b[j]
        cruncher.set_seq2(bj)
        for i in xrange(alo, ahi):
            ai = a[i]
            if ai == bj:
                if eqi is None:
                    eqi, eqj = i, j
                continue
            cruncher.set_seq1(ai)
            # computing similarity is expensive, so use the quick
            # upper bounds first -- have seen this speed up messy
            # compares by a factor of 3.
            # note that ratio() is only expensive to compute the first
            # time it's called on a sequence pair; the expensive part
            # of the computation is cached by cruncher
            if cruncher.real_quick_ratio() > best_ratio and \
                  cruncher.quick_ratio() > best_ratio and \
                  cruncher.ratio() > best_ratio:
                best_ratio, best_i, best_j = cruncher.ratio(), i, j
    if best_ratio < cutoff:
        # no non-identical "pretty close" pair
        if eqi is None:
            # no identical pair either -- treat it as a straight replace
            plain_replace(a, alo, ahi, b, blo, bhi)
            return
        # no close pair, but an identical pair -- synch up on that
        best_i, best_j, best_ratio = eqi, eqj, 1.0
    else:
        # there's a close pair, so forget the identical pair (if any)
        eqi = None

    # a[best_i] very similar to b[best_j]; eqi is None iff they're not
    # identical
    if TRACE:
        print '*** best_ratio', best_ratio, best_i, best_j
        dump('>', a, best_i, best_i+1)
        dump('<', b, best_j, best_j+1)

    # pump out diffs from before the synch point
    fancy_helper(a, alo, best_i, b, blo, best_j)

    # do intraline marking on the synch pair
    aelt, belt = a[best_i], b[best_j]
    if eqi is None:
        # pump out a '-', '?', '+', '?' quad for the synched lines
        atags = btags = ""
        cruncher.set_seqs(aelt, belt)
        for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
            la, lb = ai2 - ai1, bj2 - bj1
            if tag == 'replace':
                atags += '^' * la
                btags += '^' * lb
            elif tag == 'delete':
                atags += '-' * la
            elif tag == 'insert':
                btags += '+' * lb
            elif tag == 'equal':
                atags += ' ' * la
                btags += ' ' * lb
            else:
                raise ValueError, 'unknown tag ' + `tag`
        printq(aelt, belt, atags, btags)
    else:
        # the synch pair is identical
        print ' ', aelt,

    # pump out diffs from after the synch point
    fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)

def fancy_helper(a, alo, ahi, b, blo, bhi):
    if alo < ahi:
        if blo < bhi:
            fancy_replace(a, alo, ahi, b, blo, bhi)
        else:
            dump('-', a, alo, ahi)
    elif blo < bhi:
        dump('+', b, blo, bhi)

# Crap to deal with leading tabs in "?" output.  Can hurt, but will
# probably help most of the time.

def printq(aline, bline, atags, btags):
    common = min(count_leading(aline, "\t"),
                 count_leading(bline, "\t"))
    common = min(common, count_leading(atags[:common], " "))
    print "-", aline,
    if count_leading(atags, " ") < len(atags):
        print "?", "\t" * common + atags[common:]
    print "+", bline,
    if count_leading(btags, " ") < len(btags):
        print "?", "\t" * common + btags[common:]

def count_leading(line, ch):
    i, n = 0, len(line)
    while i < n and line[i] == ch:
        i += 1
    return i

def fail(msg):
    import sys
    out = sys.stderr.write
    out(msg + "\n\n")
    out(__doc__)
    return 0

# open a file & return the file object; gripe and return 0 if it
# couldn't be opened
def fopen(fname):
    try:
        return open(fname, 'r')
    except IOError, detail:
        return fail("couldn't open " + fname + ": " + str(detail))

# open two files & spray the diff to stdout; return false iff a problem
def fcompare(f1name, f2name):
    f1 = fopen(f1name)
    f2 = fopen(f2name)
    if not f1 or not f2:
        return 0

    a = f1.readlines(); f1.close()
    b = f2.readlines(); f2.close()

    cruncher = SequenceMatcher(IS_LINE_JUNK, a, b)
    for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
        if tag == 'replace':
            fancy_replace(a, alo, ahi, b, blo, bhi)
        elif tag == 'delete':
            dump('-', a, alo, ahi)
        elif tag == 'insert':
            dump('+', b, blo, bhi)
        elif tag == 'equal':
            dump(' ', a, alo, ahi)
        else:
            raise ValueError, 'unknown tag ' + `tag`

    return 1

# crack args (sys.argv[1:] is normal) & compare;
# return false iff a problem

def main(args):
    import getopt
    try:
        opts, args = getopt.getopt(args, "qr:")
    except getopt.error, detail:
        return fail(str(detail))
    noisy = 1
    qseen = rseen = 0
    for opt, val in opts:
        if opt == "-q":
            qseen = 1
            noisy = 0
        elif opt == "-r":
            rseen = 1
            whichfile = val
    if qseen and rseen:
        return fail("can't specify both -q and -r")
    if rseen:
        if args:
            return fail("no args allowed with -r option")
        if whichfile in "12":
            restore(whichfile)
            return 1
        return fail("-r value must be 1 or 2")
    if len(args) != 2:
        return fail("need 2 filename args")
    f1name, f2name = args
    if noisy:
        print '-:', f1name
        print '+:', f2name
    return fcompare(f1name, f2name)

def restore(which):
    import sys
    tag = {"1": "- ", "2": "+ "}[which]
    prefixes = ("  ", tag)
    for line in sys.stdin.readlines():
        if line[:2] in prefixes:
            print line[2:],

if __name__ == '__main__':
    import sys
    args = sys.argv[1:]
    if "-profile" in args:
        import profile, pstats
        args.remove("-profile")
        statf = "ndiff.pro"
        profile.run("main(args)", statf)
        stats = pstats.Stats(statf)
        stats.strip_dirs().sort_stats('time').print_stats()
    else:
        main(args)

--- NEW FILE: nm2def.py ---
#! /usr/bin/env python
"""nm2def.py

Helpers to extract symbols from Unix libs and auto-generate
Windows definition files from them. Depends on nm(1). Tested
on Linux and Solaris only (-p option to nm is for Solaris only).

By Marc-Andre Lemburg, Aug 1998.

Additional notes: the output of nm is supposed to look like this:

acceler.o:
000001fd T PyGrammar_AddAccelerators
         U PyGrammar_FindDFA
00000237 T PyGrammar_RemoveAccelerators
         U _IO_stderr_
         U exit
         U fprintf
         U free
         U malloc
         U printf

grammar1.o:
00000000 T PyGrammar_FindDFA
00000034 T PyGrammar_LabelRepr
         U _PyParser_TokenNames
         U abort
         U printf
         U sprintf

...

Even if this isn't the default output of your nm, there is generally an
option to produce this format (since it is the original v7 Unix format).

"""
import os,re,string,sys

PYTHONLIB = 'libpython'+sys.version[:3]+'.a'
PC_PYTHONLIB = 'Python'+sys.version[0]+sys.version[2]+'.dll'
NM = 'nm -p -g %s'                      # For Linux, use "nm -g %s"

def symbols(lib=PYTHONLIB,types=('T','C','D')):

    lines = os.popen(NM % lib).readlines()
    lines = map(string.strip,lines)
    symbols = {}
    for line in lines:
        if len(line) == 0 or ':' in line:
            continue
        items = string.split(line)
        if len(items) != 3:
            continue
        address, type, name = items
        if type not in types:
            continue
        symbols[name] = address,type
    return symbols

def export_list(symbols):

    data = []
    code = []
    for name,(addr,type) in symbols.items():
        if type in ('C','D'):
            data.append('\t'+name)
        else:
            code.append('\t'+name)
    data.sort()
    data.append('')
    code.sort()
    return string.join(data,' DATA\n')+'\n'+string.join(code,'\n')

# Definition file template
DEF_TEMPLATE = """\
EXPORTS
%s
"""

# Special symbols that have to be included even though they don't
# pass the filter
SPECIALS = (
    )

def filter_Python(symbols,specials=SPECIALS):

    for name in symbols.keys():
        if name[:2] == 'Py' or name[:3] == '_Py':
            pass
        elif name not in specials:
            del symbols[name]

def main():

    s = symbols(PYTHONLIB)
    filter_Python(s)
    exports = export_list(s)
    f = sys.stdout # open('PC/python_nt.def','w')
    f.write(DEF_TEMPLATE % (exports))
    f.close()

if __name__ == '__main__':
    main()

--- NEW FILE: objgraph.py ---
#! /usr/bin/env python

# objgraph
#
# Read "nm -o" input (on IRIX: "nm -Bo") of a set of libraries or modules
# and print various interesting listings, such as:
#
# - which names are used but not defined in the set (and used where),
# - which names are defined in the set (and where),
# - which modules use which other modules,
# - which modules are used by which other modules.
#
# Usage: objgraph [-cdu] [file] ...
# -c: print callers per objectfile
# -d: print callees per objectfile
# -u: print usage of undefined symbols
# If none of -cdu is specified, all are assumed.
# Use "nm -o" to generate the input (on IRIX: "nm -Bo"),
# e.g.: nm -o /lib/libc.a | objgraph

import sys
import string
import os
import getopt
import regex

# Types of symbols.
#
definitions = 'TRGDSBAEC'
externals = 'UV'
ignore = 'Nntrgdsbavuc'

# Regular expression to parse "nm -o" output.
#
matcher = regex.compile('\(.*\):\t?........ \(.\) \(.*\)$')

# Store "item" in "dict" under "key".
# The dictionary maps keys to lists of items.
# If there is no list for the key yet, it is created.
#
def store(dict, key, item):
    if dict.has_key(key):
        dict[key].append(item)
    else:
        dict[key] = [item]

# Return a flattened version of a list of strings: the concatenation
# of its elements with intervening spaces.
#
def flat(list):
    s = ''
    for item in list:
        s = s + ' ' + item
    return s[1:]

# Global variables mapping defined/undefined names to files and back.
#
file2undef = {}
def2file = {}
file2def = {}
undef2file = {}

# Read one input file and merge the data into the tables.
# Argument is an open file.
#
def readinput(file):
    while 1:
        s = file.readline()
        if not s:
            break
        # If you get any output from this line,
        # it is probably caused by an unexpected input line:
        if matcher.search(s) < 0: s; continue # Shouldn't happen
        (ra, rb), (r1a, r1b), (r2a, r2b), (r3a, r3b) = matcher.regs[:4]
        fn, name, type = s[r1a:r1b], s[r3a:r3b], s[r2a:r2b]
        if type in definitions:
            store(def2file, name, fn)
            store(file2def, fn, name)
        elif type in externals:
            store(file2undef, fn, name)
            store(undef2file, name, fn)
        elif not type in ignore:
            print fn + ':' + name + ': unknown type ' + type

# Print all names that were undefined in some module and where they are
# defined.
#
def printcallee():
    flist = file2undef.keys()
    flist.sort()
    for file in flist:
        print file + ':'
        elist = file2undef[file]
        elist.sort()
        for ext in elist:
            if len(ext) >= 8:
                tabs = '\t'
            else:
                tabs = '\t\t'
            if not def2file.has_key(ext):
                print '\t' + ext + tabs + ' *undefined'
            else:
                print '\t' + ext + tabs + flat(def2file[ext])

# Print for each module the names of the other modules that use it.
#
def printcaller():
    files = file2def.keys()
    files.sort()
    for file in files:
        callers = []
        for label in file2def[file]:
            if undef2file.has_key(label):
                callers = callers + undef2file[label]
        if callers:
            callers.sort()
            print file + ':'
            lastfn = ''
            for fn in callers:
                if fn <> lastfn:
                    print '\t' + fn
                lastfn = fn
        else:
            print file + ': unused'

# Print undefine names and where they are used.
#
def printundef():
    undefs = {}
    for file in file2undef.keys():
        for ext in file2undef[file]:
            if not def2file.has_key(ext):
                store(undefs, ext, file)
    elist = undefs.keys()
    elist.sort()
    for ext in elist:
        print ext + ':'
        flist = undefs[ext]
        flist.sort()
        for file in flist:
            print '\t' + file

# Print warning messages about names defined in more than one file.
#
def warndups():
    savestdout = sys.stdout
    sys.stdout = sys.stderr
    names = def2file.keys()
    names.sort()
    for name in names:
        if len(def2file[name]) > 1:
            print 'warning:', name, 'multiply defined:',
            print flat(def2file[name])
    sys.stdout = savestdout

# Main program
#
def main():
    try:
        optlist, args = getopt.getopt(sys.argv[1:], 'cdu')
    except getopt.error:
        sys.stdout = sys.stderr
        print 'Usage:', os.path.basename(sys.argv[0]),
        print           '[-cdu] [file] ...'
        print '-c: print callers per objectfile'
        print '-d: print callees per objectfile'
        print '-u: print usage of undefined symbols'
        print 'If none of -cdu is specified, all are assumed.'
        print 'Use "nm -o" to generate the input (on IRIX: "nm -Bo"),'
        print 'e.g.: nm -o /lib/libc.a | objgraph'
        return 1
    optu = optc = optd = 0
    for opt, void in optlist:
        if opt == '-u':
            optu = 1
        elif opt == '-c':
            optc = 1
        elif opt == '-d':
            optd = 1
    if optu == optc == optd == 0:
        optu = optc = optd = 1
    if not args:
        args = ['-']
    for file in args:
        if file == '-':
            readinput(sys.stdin)
        else:
            readinput(open(file, 'r'))
    #
    warndups()
    #
    more = (optu + optc + optd > 1)
    if optd:
        if more:
            print '---------------All callees------------------'
        printcallee()
    if optu:
        if more:
            print '---------------Undefined callees------------'
        printundef()
    if optc:
        if more:
            print '---------------All Callers------------------'
        printcaller()
    return 0

# Call the main program.
# Use its return value as exit status.
# Catch interrupts to avoid stack trace.
#
try:
    sys.exit(main())
except KeyboardInterrupt:
    sys.exit(1)

--- NEW FILE: parseentities.py ---
#!/usr/local/bin/python
""" Utility for parsing HTML entity definitions available from:

      http://www.w3.org/ as e.g.
      http://www.w3.org/TR/REC-html40/HTMLlat1.ent

    Input is read from stdin, output is written to stdout in form of a
    Python snippet defining a dictionary "entitydefs" mapping literal
    entity name to character or numeric entity.

    Marc-Andre Lemburg, mal@lemburg.com, 1999.
    Use as you like. NO WARRANTIES.

"""
import re,sys
import TextTools

entityRE = re.compile('<!ENTITY +(\w+) +CDATA +"([^"]+)" +-- +((?:.|\n)+?) *-->')

def parse(text,pos=0,endpos=None):

    pos = 0
    if endpos is None:
        endpos = len(text)
    d = {}
    while 1:
        m = entityRE.search(text,pos,endpos)
        if not m:
            break
        name,charcode,comment = m.groups()
        d[name] = charcode,comment
        pos = m.end()
    return d

def writefile(f,defs):

    f.write("entitydefs = {\n")
    items = defs.items()
    items.sort()
    for name,(charcode,comment) in items:
        if charcode[:2] == '&#':
            code = int(charcode[2:-1])
            if code < 256:
                charcode = "'\%o'" % code
            else:
                charcode = repr(charcode)
        else:
            charcode = repr(charcode)
        comment = TextTools.collapse(comment)
        f.write("    '%s':\t%s,  \t# %s\n" % (name,charcode,comment))
    f.write('\n}\n')

if __name__ == '__main__':
    if len(sys.argv) > 1:
        infile = open(sys.argv[1])
    else:
        infile = sys.stdin
    if len(sys.argv) > 2:
        outfile = open(sys.argv[2],'w')
    else:
        outfile = sys.stdout
    text = infile.read()
    defs = parse(text)
    writefile(outfile,defs)

--- NEW FILE: pathfix.py ---
#! /usr/bin/env python

# Change the #! line occurring in Python scripts.  The new interpreter
# pathname must be given with a -i option.
#
# Command line arguments are files or directories to be processed.
# Directories are searched recursively for files whose name looks
# like a python module.
# Symbolic links are always ignored (except as explicit directory
# arguments).  Of course, the original file is kept as a back-up
# (with a "~" attached to its name).
#
# Undoubtedly you can do this using find and sed or perl, but this is
# a nice example of Python code that recurses down a directory tree
# and uses regular expressions.  Also note several subtleties like
# preserving the file's mode and avoiding to even write a temp file
# when no changes are needed for a file.
#
# NB: by changing only the function fixfile() you can turn this
# into a program for a different change to Python programs...

import sys
import regex
import os
from stat import *
import string
import getopt

err = sys.stderr.write
dbg = err
rep = sys.stdout.write

new_interpreter = None

def main():
    global new_interpreter
    usage = ('usage: %s -i /interpreter file-or-directory ...\n' %
             sys.argv[0])
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'i:')
    except getopt.error, msg:
        err(msg + '\n')
        err(usage)
        sys.exit(2)
    for o, a in opts:
        if o == '-i':
            new_interpreter = a
    if not new_interpreter or new_interpreter[0] != '/' or not args:
        err('-i option or file-or-directory missing\n')
        err(usage)
        sys.exit(2)
    bad = 0
    for arg in args:
        if os.path.isdir(arg):
            if recursedown(arg): bad = 1
        elif os.path.islink(arg):
            err(arg + ': will not process symbolic links\n')
            bad = 1
        else:
            if fix(arg): bad = 1
    sys.exit(bad)

ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$')
def ispython(name):
    return ispythonprog.match(name) >= 0

def recursedown(dirname):
    dbg('recursedown(' + `dirname` + ')\n')
    bad = 0
    try:
        names = os.listdir(dirname)
    except os.error, msg:
        err(dirname + ': cannot list directory: ' + `msg` + '\n')
        return 1
    names.sort()
    subdirs = []
    for name in names:
        if name in (os.curdir, os.pardir): continue
        fullname = os.path.join(dirname, name)
        if os.path.islink(fullname): pass
        elif os.path.isdir(fullname):
            subdirs.append(fullname)
        elif ispython(name):
            if fix(fullname): bad = 1
    for fullname in subdirs:
        if recursedown(fullname): bad = 1
    return bad

def fix(filename):
##  dbg('fix(' + `filename` + ')\n')
    try:
        f = open(filename, 'r')
    except IOError, msg:
        err(filename + ': cannot open: ' + `msg` + '\n')
        return 1
    line = f.readline()
    fixed = fixline(line)
    if line == fixed:
        rep(filename+': no change\n')
        f.close()
        return
    head, tail = os.path.split(filename)
    tempname = os.path.join(head, '@' + tail)
    try:
        g = open(tempname, 'w')
    except IOError, msg:
        f.close()
        err(tempname+': cannot create: '+`msg`+'\n')
        return 1
    rep(filename + ': updating\n')
    g.write(fixed)
    BUFSIZE = 8*1024
    while 1:
        buf = f.read(BUFSIZE)
        if not buf: break
        g.write(buf)
    g.close()
    f.close()

    # Finishing touch -- move files

    # First copy the file's mode to the temp file
    try:
        statbuf = os.stat(filename)
        os.chmod(tempname, statbuf[ST_MODE] & 07777)
    except os.error, msg:
        err(tempname + ': warning: chmod failed (' + `msg` + ')\n')
    # Then make a backup of the original file as filename~
    try:
        os.rename(filename, filename + '~')
    except os.error, msg:
        err(filename + ': warning: backup failed (' + `msg` + ')\n')
    # Now move the temp file to the original file
    try:
        os.rename(tempname, filename)
    except os.error, msg:
        err(filename + ': rename failed (' + `msg` + ')\n')
        return 1
    # Return succes
    return 0

def fixline(line):
    if line[:2] != '#!':
        return line
    if string.find(line, "python") < 0:
        return line
    return '#! %s\n' % new_interpreter

main()

--- NEW FILE: pdeps.py ---
#! /usr/bin/env python

# pdeps
#
# Find dependencies between a bunch of Python modules.
#
# Usage:
#       pdeps file1.py file2.py ...
#
# Output:
# Four tables separated by lines like '--- Closure ---':
# 1) Direct dependencies, listing which module imports which other modules
# 2) The inverse of (1)
# 3) Indirect dependencies, or the closure of the above
# 4) The inverse of (3)
#
# To do:
# - command line options to select output type
# - option to automatically scan the Python library for referenced modules
# - option to limit output to particular modules

import sys
import regex
import os
import string

# Main program
#
def main():
    args = sys.argv[1:]
    if not args:
        print 'usage: pdeps file.py file.py ...'
        return 2
    #
    table = {}
    for arg in args:
        process(arg, table)
    #
    print '--- Uses ---'
    printresults(table)
    #
    print '--- Used By ---'
    inv = inverse(table)
    printresults(inv)
    #
    print '--- Closure of Uses ---'
    reach = closure(table)
    printresults(reach)
    #
    print '--- Closure of Used By ---'
    invreach = inverse(reach)
    printresults(invreach)
    #
    return 0

# Compiled regular expressions to search for import statements
#
m_import = regex.compile('^[ \t]*from[ \t]+\([^ \t]+\)[ \t]+')
m_from = regex.compile('^[ \t]*import[ \t]+\([^#]+\)')

# Collect data from one file
#
def process(filename, table):
    fp = open(filename, 'r')
    mod = os.path.basename(filename)
    if mod[-3:] == '.py':
        mod = mod[:-3]
    table[mod] = list = []
    while 1:
        line = fp.readline()
        if not line: break
        while line[-1:] == '\\':
            nextline = fp.readline()
            if not nextline: break
            line = line[:-1] + nextline
        if m_import.match(line) >= 0:
            (a, b), (a1, b1) = m_import.regs[:2]
        elif m_from.match(line) >= 0:
            (a, b), (a1, b1) = m_from.regs[:2]
        else: continue
        words = string.splitfields(line[a1:b1], ',')
        # print '#', line, words
        for word in words:
            word = string.strip(word)
            if word not in list:
                list.append(word)

# Compute closure (this is in fact totally general)
#
def closure(table):
    modules = table.keys()
    #
    # Initialize reach with a copy of table
    #
    reach = {}
    for mod in modules:
        reach[mod] = table[mod][:]
    #
    # Iterate until no more change
    #
    change = 1
    while change:
        change = 0
        for mod in modules:
            for mo in reach[mod]:
                if mo in modules:
                    for m in reach[mo]:
                        if m not in reach[mod]:
                            reach[mod].append(m)
                            change = 1
    #
    return reach

# Invert a table (this is again totally general).
# All keys of the original table are made keys of the inverse,
# so there may be empty lists in the inverse.
#
def inverse(table):
    inv = {}
    for key in table.keys():
        if not inv.has_key(key):
            inv[key] = []
        for item in table[key]:
            store(inv, item, key)
    return inv

# Store "item" in "dict" under "key".
# The dictionary maps keys to lists of items.
# If there is no list for the key yet, it is created.
#
def store(dict, key, item):
    if dict.has_key(key):
        dict[key].append(item)
    else:
        dict[key] = [item]

# Tabulate results neatly
#
def printresults(table):
    modules = table.keys()
    maxlen = 0
    for mod in modules: maxlen = max(maxlen, len(mod))
    modules.sort()
    for mod in modules:
        list = table[mod]
        list.sort()
        print string.ljust(mod, maxlen), ':',
        if mod in list:
            print '(*)',
        for ref in list:
            print ref,
        print

# Call main and honor exit status
try:
    sys.exit(main())
except KeyboardInterrupt:
    sys.exit(1)

--- NEW FILE: pindent.py ---
#! /usr/bin/env python

# This file contains a class and a main program that perform three
# related (though complimentary) formatting operations on Python
# programs.  When called as "pindent -c", it takes a valid Python
# program as input and outputs a version augmented with block-closing
# comments.  When called as "pindent -d", it assumes its input is a
# Python program with block-closing comments and outputs a commentless
# version.   When called as "pindent -r" it assumes its input is a
# Python program with block-closing comments but with its indentation
# messed up, and outputs a properly indented version.

# A "block-closing comment" is a comment of the form '# end <keyword>'
# where <keyword> is the keyword that opened the block.  If the
# opening keyword is 'def' or 'class', the function or class name may
# be repeated in the block-closing comment as well.  Here is an
# example of a program fully augmented with block-closing comments:

# def foobar(a, b):
#    if a == b:
#        a = a+1
#    elif a < b:
#        b = b-1
#        if b > a: a = a-1
#        # end if
#    else:
#        print 'oops!'
#    # end if
# # end def foobar

# Note that only the last part of an if...elif...else... block needs a
# block-closing comment; the same is true for other compound
# statements (e.g. try...except).  Also note that "short-form" blocks
# like the second 'if' in the example must be closed as well;
# otherwise the 'else' in the example would be ambiguous (remember
# that indentation is not significant when interpreting block-closing
# comments).

# The operations are idempotent (i.e. applied to their own output
# they yield an identical result).  Running first "pindent -c" and
# then "pindent -r" on a valid Python program produces a program that
# is semantically identical to the input (though its indentation may
# be different). Running "pindent -e" on that output produces a
# program that only differs from the original in indentation.

# Other options:
# -s stepsize: set the indentation step size (default 8)
# -t tabsize : set the number of spaces a tab character is worth (default 8)
# -e         : expand TABs into spaces
# file ...   : input file(s) (default standard input)
# The results always go to standard output

# Caveats:
# - comments ending in a backslash will be mistaken for continued lines
# - continuations using backslash are always left unchanged
# - continuations inside parentheses are not extra indented by -r
#   but must be indented for -c to work correctly (this breaks
#   idempotency!)
# - continued lines inside triple-quoted strings are totally garbled

# Secret feature:
# - On input, a block may also be closed with an "end statement" --
#   this is a block-closing comment without the '#' sign.

# Possible improvements:
# - check syntax based on transitions in 'next' table
# - better error reporting
# - better error recovery
# - check identifier after class/def

# The following wishes need a more complete tokenization of the source:
# - Don't get fooled by comments ending in backslash
# - reindent continuation lines indicated by backslash
# - handle continuation lines inside parentheses/braces/brackets
# - handle triple quoted strings spanning lines
# - realign comments
# - optionally do much more thorough reformatting, a la C indent

# Defaults
STEPSIZE = 8
TABSIZE = 8
EXPANDTABS = 0

import os
import re
import string
import sys

next = {}
next['if'] = next['elif'] = 'elif', 'else', 'end'
next['while'] = next['for'] = 'else', 'end'
next['try'] = 'except', 'finally'
next['except'] = 'except', 'else', 'end'
next['else'] = next['finally'] = next['def'] = next['class'] = 'end'
next['end'] = ()
start = 'if', 'while', 'for', 'try', 'def', 'class'

class PythonIndenter:

    def __init__(self, fpi = sys.stdin, fpo = sys.stdout,
                 indentsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
        self.fpi = fpi
        self.fpo = fpo
        self.indentsize = indentsize
        self.tabsize = tabsize
        self.lineno = 0
        self.expandtabs = expandtabs
        self._write = fpo.write
        self.kwprog = re.compile(
                r'^\s*(?P<kw>[a-z]+)'
                r'(\s+(?P<id>[a-zA-Z_]\w*))?'
                r'[^\w]')
        self.endprog = re.compile(
                r'^\s*#?\s*end\s+(?P<kw>[a-z]+)'
                r'(\s+(?P<id>[a-zA-Z_]\w*))?'
                r'[^\w]')
        self.wsprog = re.compile(r'^[ \t]*')
    # end def __init__

    def write(self, line):
        if self.expandtabs:
            self._write(string.expandtabs(line, self.tabsize))
        else:
            self._write(line)
        # end if
    # end def write

    def readline(self):
        line = self.fpi.readline()
        if line: self.lineno = self.lineno + 1
        # end if
        return line
    # end def readline

    def error(self, fmt, *args):
        if args: fmt = fmt % args
        # end if
        sys.stderr.write('Error at line %d: %s\n' % (self.lineno, fmt))
        self.write('### %s ###\n' % fmt)
    # end def error

    def getline(self):
        line = self.readline()
        while line[-2:] == '\\\n':
            line2 = self.readline()
            if not line2: break
            # end if
            line = line + line2
        # end while
        return line
    # end def getline

    def putline(self, line, indent = None):
        if indent is None:
            self.write(line)
            return
        # end if
        tabs, spaces = divmod(indent*self.indentsize, self.tabsize)
        i = 0
        m = self.wsprog.match(line)
        if m: i = m.end()
        # end if
        self.write('\t'*tabs + ' '*spaces + line[i:])
    # end def putline

    def reformat(self):
        stack = []
        while 1:
            line = self.getline()
            if not line: break      # EOF
            # end if
            m = self.endprog.match(line)
            if m:
                kw = 'end'
                kw2 = m.group('kw')
                if not stack:
                    self.error('unexpected end')
                elif stack[-1][0] != kw2:
                    self.error('unmatched end')
                # end if
                del stack[-1:]
                self.putline(line, len(stack))
                continue
            # end if
            m = self.kwprog.match(line)
            if m:
                kw = m.group('kw')
                if kw in start:
                    self.putline(line, len(stack))
                    stack.append((kw, kw))
                    continue
                # end if
                if next.has_key(kw) and stack:
                    self.putline(line, len(stack)-1)
                    kwa, kwb = stack[-1]
                    stack[-1] = kwa, kw
                    continue
                # end if
            # end if
            self.putline(line, len(stack))
        # end while
        if stack:
            self.error('unterminated keywords')
            for kwa, kwb in stack:
                self.write('\t%s\n' % kwa)
            # end for
        # end if
    # end def reformat

    def delete(self):
        begin_counter = 0
        end_counter = 0
        while 1:
            line = self.getline()
            if not line: break      # EOF
            # end if
            m = self.endprog.match(line)
            if m:
                end_counter = end_counter + 1
                continue
            # end if
            m = self.kwprog.match(line)
            if m:
                kw = m.group('kw')
                if kw in start:
                    begin_counter = begin_counter + 1
                # end if
            # end if
            self.putline(line)
        # end while
        if begin_counter - end_counter < 0:
            sys.stderr.write('Warning: input contained more end tags than expected\n')
        elif begin_counter - end_counter > 0:
            sys.stderr.write('Warning: input contained less end tags than expected\n')
        # end if
    # end def delete

    def complete(self):
        self.indentsize = 1
        stack = []
        todo = []
        current, firstkw, lastkw, topid = 0, '', '', ''
        while 1:
            line = self.getline()
            i = 0
            m = self.wsprog.match(line)
            if m: i = m.end()
            # end if
            m = self.endprog.match(line)
            if m:
                thiskw = 'end'
                endkw = m.group('kw')
                thisid = m.group('id')
            else:
                m = self.kwprog.match(line)
                if m:
                    thiskw = m.group('kw')
                    if not next.has_key(thiskw):
                        thiskw = ''
                    # end if
                    if thiskw in ('def', 'class'):
                        thisid = m.group('id')
                    else:
                        thisid = ''
                    # end if
                elif line[i:i+1] in ('\n', '#'):
                    todo.append(line)
                    continue
                else:
                    thiskw = ''
                # end if
            # end if
            indent = len(string.expandtabs(line[:i], self.tabsize))
            while indent < current:
                if firstkw:
                    if topid:
                        s = '# end %s %s\n' % (
                                firstkw, topid)
                    else:
                        s = '# end %s\n' % firstkw
                    # end if
                    self.putline(s, current)
                    firstkw = lastkw = ''
                # end if
                current, firstkw, lastkw, topid = stack[-1]
                del stack[-1]
            # end while
            if indent == current and firstkw:
                if thiskw == 'end':
                    if endkw != firstkw:
                        self.error('mismatched end')
                    # end if
                    firstkw = lastkw = ''
                elif not thiskw or thiskw in start:
                    if topid:
                        s = '# end %s %s\n' % (
                                firstkw, topid)
                    else:
                        s = '# end %s\n' % firstkw
                    # end if
                    self.putline(s, current)
                    firstkw = lastkw = topid = ''
                # end if
            # end if
            if indent > current:
                stack.append((current, firstkw, lastkw, topid))
                if thiskw and thiskw not in start:
                    # error
                    thiskw = ''
                # end if
                current, firstkw, lastkw, topid = \
                         indent, thiskw, thiskw, thisid
            # end if
            if thiskw:
                if thiskw in start:
                    firstkw = lastkw = thiskw
                    topid = thisid
                else:
                    lastkw = thiskw
                # end if
            # end if
            for l in todo: self.write(l)
            # end for
            todo = []
            if not line: break
            # end if
            self.write(line)
        # end while
    # end def complete

# end class PythonIndenter

# Simplified user interface
# - xxx_filter(input, output): read and write file objects
# - xxx_string(s): take and return string object
# - xxx_file(filename): process file in place, return true iff changed

def complete_filter(input = sys.stdin, output = sys.stdout,
                    stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
    pi.complete()
# end def complete_filter

def delete_filter(input= sys.stdin, output = sys.stdout,
                        stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
    pi.delete()
# end def delete_filter

def reformat_filter(input = sys.stdin, output = sys.stdout,
                    stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
    pi.reformat()
# end def reformat_filter

class StringReader:
    def __init__(self, buf):
        self.buf = buf
        self.pos = 0
        self.len = len(self.buf)
    # end def __init__
    def read(self, n = 0):
        if n <= 0:
            n = self.len - self.pos
        else:
            n = min(n, self.len - self.pos)
        # end if
        r = self.buf[self.pos : self.pos + n]
        self.pos = self.pos + n
        return r
    # end def read
    def readline(self):
        i = string.find(self.buf, '\n', self.pos)
        return self.read(i + 1 - self.pos)
    # end def readline
    def readlines(self):
        lines = []
        line = self.readline()
        while line:
            lines.append(line)
            line = self.readline()
        # end while
        return lines
    # end def readlines
    # seek/tell etc. are left as an exercise for the reader
# end class StringReader

class StringWriter:
    def __init__(self):
        self.buf = ''
    # end def __init__
    def write(self, s):
        self.buf = self.buf + s
    # end def write
    def getvalue(self):
        return self.buf
    # end def getvalue
# end class StringWriter

def complete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    input = StringReader(source)
    output = StringWriter()
    pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
    pi.complete()
    return output.getvalue()
# end def complete_string

def delete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    input = StringReader(source)
    output = StringWriter()
    pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
    pi.delete()
    return output.getvalue()
# end def delete_string

def reformat_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    input = StringReader(source)
    output = StringWriter()
    pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
    pi.reformat()
    return output.getvalue()
# end def reformat_string

def complete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    source = open(filename, 'r').read()
    result = complete_string(source, stepsize, tabsize, expandtabs)
    if source == result: return 0
    # end if
    import os
    try: os.rename(filename, filename + '~')
    except os.error: pass
    # end try
    f = open(filename, 'w')
    f.write(result)
    f.close()
    return 1
# end def complete_file

def delete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    source = open(filename, 'r').read()
    result = delete_string(source, stepsize, tabsize, expandtabs)
    if source == result: return 0
    # end if
    import os
    try: os.rename(filename, filename + '~')
    except os.error: pass
    # end try
    f = open(filename, 'w')
    f.write(result)
    f.close()
    return 1
# end def delete_file

def reformat_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
    source = open(filename, 'r').read()
    result = reformat_string(source, stepsize, tabsize, expandtabs)
    if source == result: return 0
    # end if
    import os
    try: os.rename(filename, filename + '~')
    except os.error: pass
    # end try
    f = open(filename, 'w')
    f.write(result)
    f.close()
    return 1
# end def reformat_file

# Test program when called as a script

usage = """
usage: pindent (-c|-d|-r) [-s stepsize] [-t tabsize] [-e] [file] ...
-c         : complete a correctly indented program (add #end directives)
-d         : delete #end directives
-r         : reformat a completed program (use #end directives)
-s stepsize: indentation step (default %(STEPSIZE)d)
-t tabsize : the worth in spaces of a tab (default %(TABSIZE)d)
-e         : expand TABs into spaces (defailt OFF)
[file] ... : files are changed in place, with backups in file~
If no files are specified or a single - is given,
the program acts as a filter (reads stdin, writes stdout).
""" % vars()

def error_both(op1, op2):
    sys.stderr.write('Error: You can not specify both '+op1+' and -'+op2[0]+' at the same time\n')
    sys.stderr.write(usage)
    sys.exit(2)
# end def error_both

def test():
    import getopt
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'cdrs:t:e')
    except getopt.error, msg:
        sys.stderr.write('Error: %s\n' % msg)
        sys.stderr.write(usage)
        sys.exit(2)
    # end try
    action = None
    stepsize = STEPSIZE
    tabsize = TABSIZE
    expandtabs = EXPANDTABS
    for o, a in opts:
        if o == '-c':
            if action: error_both(o, action)
            # end if
            action = 'complete'
        elif o == '-d':
            if action: error_both(o, action)
            # end if
            action = 'delete'
        elif o == '-r':
            if action: error_both(o, action)
            # end if
            action = 'reformat'
        elif o == '-s':
            stepsize = string.atoi(a)
        elif o == '-t':
            tabsize = string.atoi(a)
        elif o == '-e':
            expandtabs = 1
        # end if
    # end for
    if not action:
        sys.stderr.write(
                'You must specify -c(omplete), -d(elete) or -r(eformat)\n')
        sys.stderr.write(usage)
        sys.exit(2)
    # end if
    if not args or args == ['-']:
        action = eval(action + '_filter')
        action(sys.stdin, sys.stdout, stepsize, tabsize, expandtabs)
    else:
        action = eval(action + '_file')
        for file in args:
            action(file, stepsize, tabsize, expandtabs)
        # end for
    # end if
# end def test

if __name__ == '__main__':
    test()
# end if

--- NEW FILE: ptags.py ---
#! /usr/bin/env python

# ptags
#
# Create a tags file for Python programs, usable with vi.
# Tagged are:
# - functions (even inside other defs or classes)
# - classes
# - filenames
# Warns about files it cannot open.
# No warnings about duplicate tags.

import sys, re, os

tags = []    # Modified global variable!

def main():
    args = sys.argv[1:]
    for file in args: treat_file(file)
    if tags:
        fp = open('tags', 'w')
        tags.sort()
        for s in tags: fp.write(s)

expr = '^[ \t]*(def|class)[ \t]+([a-zA-Z0-9_]+)[ \t]*[:\(]'
matcher = re.compile(expr)

def treat_file(file):
    try:
        fp = open(file, 'r')
    except:
        sys.stderr.write('Cannot open %s\n' % file)
        return
    base = os.path.basename(file)
    if base[-3:] == '.py':
        base = base[:-3]
    s = base + '\t' + file + '\t' + '1\n'
    tags.append(s)
    while 1:
        line = fp.readline()
        if not line:
            break
        m = matcher.match(line)
        if m:
            content = m.group(0)
            name = m.group(2)
            s = name + '\t' + file + '\t/^' + content + '/\n'
            tags.append(s)

main()

--- NEW FILE: pydoc.pyw ---
import pydoc
pydoc.gui()

--- NEW FILE: redemo.py ---
"""Basic regular expression demostration facility (Perl style syntax)."""

from Tkinter import *
import re

class ReDemo:

    def __init__(self, master):
        self.master = master

        self.promptdisplay = Label(self.master, anchor=W,
                text="Enter a Perl-style regular expression:")
        self.promptdisplay.pack(side=TOP, fill=X)

        self.regexdisplay = Entry(self.master)
        self.regexdisplay.pack(fill=X)
        self.regexdisplay.focus_set()

        self.addoptions()

        self.statusdisplay = Label(self.master, text="", anchor=W)
        self.statusdisplay.pack(side=TOP, fill=X)

        self.labeldisplay = Label(self.master, anchor=W,
                text="Enter a string to search:")
        self.labeldisplay.pack(fill=X)
        self.labeldisplay.pack(fill=X)

        self.showframe = Frame(master)
        self.showframe.pack(fill=X, anchor=W)

        self.showvar = StringVar(master)
        self.showvar.set("first")

        self.showfirstradio = Radiobutton(self.showframe,
                                         text="Highlight first match",
                                          variable=self.showvar,
                                          value="first",
                                          command=self.recompile)
        self.showfirstradio.pack(side=LEFT)

        self.showallradio = Radiobutton(self.showframe,
                                        text="Highlight all matches",
                                        variable=self.showvar,
                                        value="all",
                                        command=self.recompile)
        self.showallradio.pack(side=LEFT)

        self.stringdisplay = Text(self.master, width=60, height=4)
        self.stringdisplay.pack(fill=BOTH, expand=1)
        self.stringdisplay.tag_configure("hit", background="yellow")

        self.grouplabel = Label(self.master, text="Groups:", anchor=W)
        self.grouplabel.pack(fill=X)

        self.grouplist = Listbox(self.master)
        self.grouplist.pack(expand=1, fill=BOTH)

        self.regexdisplay.bind('<Key>', self.recompile)
        self.stringdisplay.bind('<Key>', self.reevaluate)

        self.compiled = None
        self.recompile()

        btags = self.regexdisplay.bindtags()
        self.regexdisplay.bindtags(btags[1:] + btags[:1])

        btags = self.stringdisplay.bindtags()
        self.stringdisplay.bindtags(btags[1:] + btags[:1])

    def addoptions(self):
        self.frames = []
        self.boxes = []
        self.vars = []
        for name in ('IGNORECASE',
                     'LOCALE',
                     'MULTILINE',
                     'DOTALL',
                     'VERBOSE'):
            if len(self.boxes) % 3 == 0:
                frame = Frame(self.master)
                frame.pack(fill=X)
                self.frames.append(frame)
            val = getattr(re, name)
            var = IntVar()
            box = Checkbutton(frame,
                    variable=var, text=name,
                    offvalue=0, onvalue=val,
                    command=self.recompile)
            box.pack(side=LEFT)
            self.boxes.append(box)
            self.vars.append(var)

    def getflags(self):
        flags = 0
        for var in self.vars:
            flags = flags | var.get()
        flags = flags
        return flags

    def recompile(self, event=None):
        try:
            self.compiled = re.compile(self.regexdisplay.get(),
                                       self.getflags())
            bg = self.promptdisplay['background']
            self.statusdisplay.config(text="", background=bg)
        except re.error, msg:
            self.compiled = None
            self.statusdisplay.config(
                    text="re.error: %s" % str(msg),
                    background="red")
        self.reevaluate()

    def reevaluate(self, event=None):
        try:
            self.stringdisplay.tag_remove("hit", "1.0", END)
        except TclError:
            pass
        try:
            self.stringdisplay.tag_remove("hit0", "1.0", END)
        except TclError:
            pass
        self.grouplist.delete(0, END)
        if not self.compiled:
            return
        self.stringdisplay.tag_configure("hit", background="yellow")
        self.stringdisplay.tag_configure("hit0", background="orange")
        text = self.stringdisplay.get("1.0", END)
        last = 0
        nmatches = 0
        while last <= len(text):
            m = self.compiled.search(text, last)
            if m is None:
                break
            first, last = m.span()
            if last == first:
                last = first+1
                tag = "hit0"
            else:
                tag = "hit"
            pfirst = "1.0 + %d chars" % first
            plast = "1.0 + %d chars" % last
            self.stringdisplay.tag_add(tag, pfirst, plast)
            if nmatches == 0:
                self.stringdisplay.yview_pickplace(pfirst)
                groups = list(m.groups())
                groups.insert(0, m.group())
                for i in range(len(groups)):
                    g = "%2d: %s" % (i, `groups[i]`)
                    self.grouplist.insert(END, g)
            nmatches = nmatches + 1
            if self.showvar.get() == "first":
                break

        if nmatches == 0:
            self.statusdisplay.config(text="(no match)",
                                      background="yellow")
        else:
            self.statusdisplay.config(text="")

# Main function, run when invoked as a stand-alone Python program.

def main():
    root = Tk()
    demo = ReDemo(root)
    root.protocol('WM_DELETE_WINDOW', root.quit)
    root.mainloop()

if __name__ == '__main__':
    main()

--- NEW FILE: reindent.py ---
#! /usr/bin/env python

# Released to the public domain, by Tim Peters, 03 October 2000.

"""reindent [-d][-r][-v] path ...

-d  Dry run.  Analyze, but don't make any changes to, files.
-r  Recurse.  Search for all .py files in subdirectories too.
-v  Verbose.  Print informative msgs; else no output.

Change Python (.py) files to use 4-space indents and no hard tab characters.
Also trim excess whitespace from ends of lines, and empty lines at the ends
of files.  Ensure the last line ends with a newline.

Pass one or more file and/or directory paths.  When a directory path, all
.py files within the directory will be examined, and, if the -r option is
given, likewise recursively for subdirectories.

Overwrites files in place, renaming the originals with a .bak extension.
If reindent finds nothing to change, the file is left alone.  If reindent
does change a file, the changed file is a fixed-point for reindent (i.e.,
running reindent on the resulting .py file won't change it again).

The hard part of reindenting is figuring out what to do with comment
lines.  So long as the input files get a clean bill of health from
tabnanny.py, reindent should do a good job.
"""

__version__ = "1"

import tokenize
import os
import sys

verbose = 0
recurse = 0
dryrun  = 0

def errprint(*args):
    sep = ""
    for arg in args:
        sys.stderr.write(sep + str(arg))
        sep = " "
    sys.stderr.write("\n")

def main():
    import getopt
    global verbose, recurse, dryrun
    try:
        opts, args = getopt.getopt(sys.argv[1:], "drv")
    except getopt.error, msg:
        errprint(msg)
        return
    for o, a in opts:
        if o == '-d':
            dryrun += 1
        elif o == '-r':
            recurse += 1
        elif o == '-v':
            verbose += 1
    if not args:
        errprint("Usage:", __doc__)
        return
    for arg in args:
        check(arg)

def check(file):
    if os.path.isdir(file) and not os.path.islink(file):
        if verbose:
            print "listing directory", file
        names = os.listdir(file)
        for name in names:
            fullname = os.path.join(file, name)
            if ((recurse and os.path.isdir(fullname) and
                 not os.path.islink(fullname))
                or name.lower().endswith(".py")):
                check(fullname)
        return

    if verbose:
        print "checking", file, "...",
    try:
        f = open(file)
    except IOError, msg:
        errprint("%s: I/O Error: %s" % (file, str(msg)))
        return

    r = Reindenter(f)
    f.close()
    if r.run():
        if verbose:
            print "changed."
            if dryrun:
                print "But this is a dry run, so leaving it alone."
        if not dryrun:
            bak = file + ".bak"
            if os.path.exists(bak):
                os.remove(bak)
            os.rename(file, bak)
            if verbose:
                print "renamed", file, "to", bak
            f = open(file, "w")
            r.write(f)
            f.close()
            if verbose:
                print "wrote new", file
    else:
        if verbose:
            print "unchanged."

class Reindenter:

    def __init__(self, f):
        self.find_stmt = 1  # next token begins a fresh stmt?
        self.level = 0      # current indent level

        # Raw file lines.
        self.raw = f.readlines()

        # File lines, rstripped & tab-expanded.  Dummy at start is so
        # that we can use tokenize's 1-based line numbering easily.
        # Note that a line is all-blank iff it's "\n".
        self.lines = [line.rstrip().expandtabs() + "\n"
                      for line in self.raw]
        self.lines.insert(0, None)
        self.index = 1  # index into self.lines of next line

        # List of (lineno, indentlevel) pairs, one for each stmt and
        # comment line.  indentlevel is -1 for comment lines, as a
        # signal that tokenize doesn't know what to do about them;
        # indeed, they're our headache!
        self.stats = []

    def run(self):
        tokenize.tokenize(self.getline, self.tokeneater)
        # Remove trailing empty lines.
        lines = self.lines
        while lines and lines[-1] == "\n":
            lines.pop()
        # Sentinel.
        stats = self.stats
        stats.append((len(lines), 0))
        # Map count of leading spaces to # we want.
        have2want = {}
        # Program after transformation.
        after = self.after = []
        for i in range(len(stats)-1):
            thisstmt, thislevel = stats[i]
            nextstmt = stats[i+1][0]
            have = getlspace(lines[thisstmt])
            want = thislevel * 4
            if want < 0:
                # A comment line.
                if have:
                    # An indented comment line.  If we saw the same
                    # indentation before, reuse what it most recently
                    # mapped to.
                    want = have2want.get(have, -1)
                    if want < 0:
                        # Then it probably belongs to the next real stmt.
                        for j in xrange(i+1, len(stats)-1):
                            jline, jlevel = stats[j]
                            if jlevel >= 0:
                                if have == getlspace(lines[jline]):
                                    want = jlevel * 4
                                break
                    if want < 0:           # Maybe it's a hanging
                                           # comment like this one,
                        # in which case we should shift it like its base
                        # line got shifted.
                        for j in xrange(i-1, -1, -1):
                            jline, jlevel = stats[j]
                            if jlevel >= 0:
                                want = have + getlspace(after[jline-1]) - \
                                       getlspace(lines[jline])
                                break
                    if want < 0:
                        # Still no luck -- leave it alone.
                        want = have
                else:
                    want = 0
            assert want >= 0
            have2want[have] = want
            diff = want - have
            if diff == 0 or have == 0:
                after.extend(lines[thisstmt:nextstmt])
            else:
                for line in lines[thisstmt:nextstmt]:
                    if diff > 0:
                        if line == "\n":
                            after.append(line)
                        else:
                            after.append(" " * diff + line)
                    else:
                        remove = min(getlspace(line), -diff)
                        after.append(line[remove:])
        return self.raw != self.after

    def write(self, f):
        f.writelines(self.after)

    # Line-getter for tokenize.
    def getline(self):
        if self.index >= len(self.lines):
            line = ""
        else:
            line = self.lines[self.index]
            self.index += 1
        return line

    # Line-eater for tokenize.
    def tokeneater(self, type, token, (sline, scol), end, line,
                   INDENT=tokenize.INDENT,
                   DEDENT=tokenize.DEDENT,
                   NEWLINE=tokenize.NEWLINE,
                   COMMENT=tokenize.COMMENT,
                   NL=tokenize.NL):

        if type == NEWLINE:
            # A program statement, or ENDMARKER, will eventually follow,
            # after some (possibly empty) run of tokens of the form
            #     (NL | COMMENT)* (INDENT | DEDENT+)?
            self.find_stmt = 1

        elif type == INDENT:
            self.find_stmt = 1
            self.level += 1

        elif type == DEDENT:
            self.find_stmt = 1
            self.level -= 1

        elif type == COMMENT:
            if self.find_stmt:
                self.stats.append((sline, -1))
                # but we're still looking for a new stmt, so leave
                # find_stmt alone

        elif type == NL:
            pass

        elif self.find_stmt:
            # This is the first "real token" following a NEWLINE, so it
            # must be the first token of the next program statement, or an
            # ENDMARKER.
            self.find_stmt = 0
            if line:   # not endmarker
                self.stats.append((sline, self.level))

# Count number of leading blanks.
def getlspace(line):
    i, n = 0, len(line)
    while i < n and line[i] == " ":
        i += 1
    return i

if __name__ == '__main__':
    main()

--- NEW FILE: rgrep.py ---
#! /usr/bin/env python

"""Reverse grep.

Usage: rgrep [-i] pattern file
"""

import sys
import re
import string
import getopt

def main():
    bufsize = 64*1024
    reflags = 0
    opts, args = getopt.getopt(sys.argv[1:], "i")
    for o, a in opts:
        if o == '-i':
            reflags = reflags | re.IGNORECASE
    if len(args) < 2:
        usage("not enough arguments")
    if len(args) > 2:
        usage("exactly one file argument required")
    pattern, filename = args
    try:
        prog = re.compile(pattern, reflags)
    except re.error, msg:
        usage("error in regular expression: %s" % str(msg))
    try:
        f = open(filename)
    except IOError, msg:
        usage("can't open %s: %s" % (repr(filename), str(msg)), 1)
    f.seek(0, 2)
    pos = f.tell()
    leftover = None
    while pos > 0:
        size = min(pos, bufsize)
        pos = pos - size
        f.seek(pos)
        buffer = f.read(size)
        lines = string.split(buffer, "\n")
        del buffer
        if leftover is None:
            if not lines[-1]:
                del lines[-1]
        else:
            lines[-1] = lines[-1] + leftover
        if pos > 0:
            leftover = lines[0]
            del lines[0]
        else:
            leftover = None
        lines.reverse()
        for line in lines:
            if prog.search(line):
                print line

def usage(msg, code=2):
    sys.stdout = sys.stderr
    print msg
    print __doc__
    sys.exit(code)

if __name__ == '__main__':
    main()

--- NEW FILE: suff.py ---
#! /usr/bin/env python

# suff
#
# show different suffixes amongst arguments

import sys

def main():
    files = sys.argv[1:]
    suffixes = {}
    for file in files:
        suff = getsuffix(file)
        if not suffixes.has_key(suff):
            suffixes[suff] = []
        suffixes[suff].append(file)
    keys = suffixes.keys()
    keys.sort()
    for suff in keys:
        print `suff`, len(suffixes[suff])

def getsuffix(file):
    suff = ''
    for i in range(len(file)):
        if file[i] == '.':
            suff = file[i:]
    return suff

main()

--- NEW FILE: sum5.py ---
#! /usr/bin/env python

# print md5 checksum for files

bufsize = 8096
fnfilter = None
rmode = 'r'

usage = """
usage: sum5 [-b] [-t] [-l] [-s bufsize] [file ...]
-b        : read files in binary mode
-t        : read files in text mode (default)
-l        : print last pathname component only
-s bufsize: read buffer size (default %d)
file ...  : files to sum; '-' or no files means stdin
""" % bufsize

import sys
import string
import os
import md5
import regsub

StringType = type('')
FileType = type(sys.stdin)

def sum(*files):
    sts = 0
    if files and type(files[-1]) == FileType:
        out, files = files[-1], files[:-1]
    else:
        out = sys.stdout
    if len(files) == 1 and type(files[0]) != StringType:
        files = files[0]
    for f in files:
        if type(f) == StringType:
            if f == '-':
                sts = printsumfp(sys.stdin, '<stdin>', out) or sts
            else:
                sts = printsum(f, out) or sts
        else:
            sts = sum(f, out) or sts
    return sts

def printsum(file, out = sys.stdout):
    try:
        fp = open(file, rmode)
    except IOError, msg:
        sys.stderr.write('%s: Can\'t open: %s\n' % (file, msg))
        return 1
    if fnfilter:
        file = fnfilter(file)
    sts = printsumfp(fp, file, out)
    fp.close()
    return sts

def printsumfp(fp, file, out = sys.stdout):
    m = md5.md5()
    try:
        while 1:
            data = fp.read(bufsize)
            if not data: break
            m.update(data)
    except IOError, msg:
        sys.stderr.write('%s: I/O error: %s\n' % (file, msg))
        return 1
    out.write('%s %s\n' % (hexify(m.digest()), file))
    return 0

def hexify(s):
    res = ''
    for c in s:
        res = res + '%02x' % ord(c)
    return res

def main(args = sys.argv[1:], out = sys.stdout):
    global fnfilter, rmode, bufsize
    import getopt
    try:
        opts, args = getopt.getopt(args, 'blts:')
    except getopt.error, msg:
        sys.stderr.write('%s: %s\n%s' % (sys.argv[0], msg, usage))
        return 2
    for o, a in opts:
        if o == '-l':
            fnfilter = os.path.basename
        if o == '-b':
            rmode = 'rb'
        if o == '-t':
            rmode = 'r'
        if o == '-s':
            bufsize = string.atoi(a)
    if not args: args = ['-']
    return sum(args, out)

if __name__ == '__main__' or __name__ == sys.argv[0]:
    sys.exit(main(sys.argv[1:], sys.stdout))

--- NEW FILE: texi2html.py ---
#! /usr/bin/env python

# Convert GNU texinfo files into HTML, one file per node.
# Based on Texinfo 2.14.
# Usage: texi2html [-d] [-d] [-c] inputfile outputdirectory
# The input file must be a complete texinfo file, e.g. emacs.texi.
# This creates many files (one per info node) in the output directory,
# overwriting existing files of the same name.  All files created have
# ".html" as their extension.

# XXX To do:
# - handle @comment*** correctly
# - handle @xref {some words} correctly
# - handle @ftable correctly (items aren't indexed?)
# - handle @itemx properly
# - handle @exdent properly
# - add links directly to the proper line from indices
# - check against the definitive list of @-cmds; we still miss (among others):
[...1574 lines suppressed...]
    parser.print_headers = print_headers

    file = sys.argv[1]
    parser.setdirname(sys.argv[2])
    if file == '-':
        fp = sys.stdin
    else:
        parser.setincludedir(os.path.dirname(file))
        try:
            fp = open(file, 'r')
        except IOError, msg:
            print file, ':', msg
            sys.exit(1)
    parser.parse(fp)
    fp.close()
    parser.report()

if __name__ == "__main__":
    test()

--- NEW FILE: trace.py ---
#!/usr/bin/env python

# Copyright 2000, Mojam Media, Inc., all rights reserved.
# Author: Skip Montanaro
#
# Copyright 1999, Bioreason, Inc., all rights reserved.
# Author: Andrew Dalke
#
# Copyright 1995-1997, Automatrix, Inc., all rights reserved.
# Author: Skip Montanaro
#
# Copyright 1991-1995, Stichting Mathematisch Centrum, all rights reserved.
#
#
# Permission to use, copy, modify, and distribute this Python software and
# its associated documentation for any purpose without fee is hereby
# granted, provided that the above copyright notice appears in all copies,
# and that both that copyright notice and this permission notice appear in
# supporting documentation, and that the name of neither Automatrix,
# Bioreason or Mojam Media be used in advertising or publicity pertaining to
# distribution of the software without specific, written prior permission.
#
#
# Summary of recent changes:
#   Support for files with the same basename (submodules in packages)
#   Expanded the idea of how to ignore files or modules
#   Split tracing and counting into different classes
#   Extracted count information and reporting from the count class
#   Added some ability to detect which missing lines could be executed
#   Added pseudo-pragma to prohibit complaining about unexecuted lines
#   Rewrote the main program

# Summary of older changes:
#   Added run-time display of statements being executed
#   Incorporated portability and performance fixes from Greg Stein
#   Incorporated main program from Michael Scharf

"""
program/module to trace Python program or function execution

Sample use, command line:
  trace.py -c -f counts --ignore-dir '$prefix' spam.py eggs
  trace.py -t --ignore-dir '$prefix' spam.py eggs

Sample use, programmatically (still more complicated than it should be)
   # create an Ignore option, telling it what you want to ignore
   ignore = trace.Ignore(dirs = [sys.prefix, sys.exec_prefix])
   # create a Coverage object, telling it what to ignore
   coverage = trace.Coverage(ignore)
   # run the new command using the given trace
   trace.run(coverage.trace, 'main()')

   # make a report, telling it where you want output
   t = trace.create_results_log(coverage.results(),
                                '/usr/local/Automatrix/concerts/coverage')
                                show_missing = 1)

   The Trace class can be instantited instead of the Coverage class if
   runtime display of executable lines is desired instead of statement
   converage measurement.
"""

import sys, os, string, marshal, tempfile, copy, operator

def usage(outfile):
    outfile.write("""Usage: %s [OPTIONS] <file> [ARGS]

Execution:
      --help           Display this help then exit.
      --version        Output version information then exit.
   -t,--trace          Print the line to be executed to sys.stdout.
   -c,--count          Count the number of times a line is executed.
                         Results are written in the results file, if given.
   -r,--report         Generate a report from a results file; do not
                         execute any code.
        (One of `-t', `-c' or `-r' must be specified)
   -s,--summary        Generate a brief summary for each file.  (Can only
                         be used with -c or -r.)

I/O:
   -f,--file=          File name for accumulating results over several runs.
                         (No file name means do not archive results)
   -d,--logdir=        Directory to use when writing annotated log files.
                         Log files are the module __name__ with `.` replaced
                         by os.sep and with '.pyl' added.
   -m,--missing        Annotate all executable lines which were not executed
                         with a '>>>>>> '.
   -R,--no-report      Do not generate the annotated reports.  Useful if
                         you want to accumulate several over tests.
   -C,--coverdir=      Generate .cover files in this directory

Selection:                 Do not trace or log lines from ...
  --ignore-module=[string]   modules with the given __name__, and submodules
                              of that module
  --ignore-dir=[string]      files in the stated directory (multiple
                              directories can be joined by os.pathsep)

  The selection options can be listed multiple times to ignore different
modules.
""" % sys.argv[0])

class Ignore:
    def __init__(self, modules = None, dirs = None):
        self._mods = modules or []
        self._dirs = dirs or []

        self._ignore = { '<string>': 1 }

    def names(self, filename, modulename):
        if self._ignore.has_key(modulename):
            return self._ignore[modulename]

        # haven't seen this one before, so see if the module name is
        # on the ignore list.  Need to take some care since ignoring
        # "cmp" musn't mean ignoring "cmpcache" but ignoring
        # "Spam" must also mean ignoring "Spam.Eggs".
        for mod in self._mods:
            if mod == modulename:  # Identical names, so ignore
                self._ignore[modulename] = 1
                return 1
            # check if the module is a proper submodule of something on
            # the ignore list
            n = len(mod)
            # (will not overflow since if the first n characters are the
            # same and the name has not already occured, then the size
            # of "name" is greater than that of "mod")
            if mod == modulename[:n] and modulename[n] == '.':
                self._ignore[modulename] = 1
                return 1

        # Now check that __file__ isn't in one of the directories
        if filename is None:
            # must be a built-in, so we must ignore
            self._ignore[modulename] = 1
            return 1

        # Ignore a file when it contains one of the ignorable paths
        for d in self._dirs:
            # The '+ os.sep' is to ensure that d is a parent directory,
            # as compared to cases like:
            #  d = "/usr/local"
            #  filename = "/usr/local.py"
            # or
            #  d = "/usr/local.py"
            #  filename = "/usr/local.py"
            if string.find(filename, d + os.sep) == 0:
                self._ignore[modulename] = 1
                return 1

        # Tried the different ways, so we don't ignore this module
        self._ignore[modulename] = 0
        return 0

def run(trace, cmd):
    import __main__
    dict = __main__.__dict__
    sys.settrace(trace)
    try:
        exec cmd in dict, dict
    finally:
        sys.settrace(None)

def runctx(trace, cmd, globals=None, locals=None):
    if globals is None: globals = {}
    if locals is None: locals = {}
    sys.settrace(trace)
    try:
        exec cmd in dict, dict
    finally:
        sys.settrace(None)

def runfunc(trace, func, *args, **kw):
    result = None
    sys.settrace(trace)
    try:
        result = apply(func, args, kw)
    finally:
        sys.settrace(None)
    return result

class CoverageResults:
    def __init__(self, counts = {}, modules = {}):
        self.counts = counts.copy()    # map (filename, lineno) to count
        self.modules = modules.copy()  # map filenames to modules

    def update(self, other):
        """Merge in the data from another CoverageResults"""
        counts = self.counts
        other_counts = other.counts
        modules = self.modules
        other_modules = other.modules

        for key in other_counts.keys():
            counts[key] = counts.get(key, 0) + other_counts[key]

        for key in other_modules.keys():
            if modules.has_key(key):
                # make sure they point to the same file
                assert modules[key] == other_modules[key], \
                      "Strange! filename %s has two different module " \
                      "names: %s and %s" % \
                      (key, modules[key], other_modules[key])
            else:
                modules[key] = other_modules[key]

# Given a code string, return the SET_LINENO information
def _find_LINENO_from_string(co_code):
    """return all of the SET_LINENO information from a code string"""
    import dis
    linenos = {}

    # This code was filched from the `dis' module then modified
    n = len(co_code)
    i = 0
    prev_op = None
    prev_lineno = 0
    while i < n:
        c = co_code[i]
        op = ord(c)
        if op == dis.SET_LINENO:
            if prev_op == op:
                # two SET_LINENO in a row, so the previous didn't
                # indicate anything.  This occurs with triple
                # quoted strings (?).  Remove the old one.
                del linenos[prev_lineno]
            prev_lineno = ord(co_code[i+1]) + ord(co_code[i+2])*256
            linenos[prev_lineno] = 1
        if op >= dis.HAVE_ARGUMENT:
            i = i + 3
        else:
            i = i + 1
        prev_op = op
    return linenos

def _find_LINENO(code):
    """return all of the SET_LINENO information from a code object"""
    import types

    # get all of the lineno information from the code of this scope level
    linenos = _find_LINENO_from_string(code.co_code)

    # and check the constants for references to other code objects
    for c in code.co_consts:
        if type(c) == types.CodeType:
            # find another code object, so recurse into it
            linenos.update(_find_LINENO(c))
    return linenos

def find_executable_linenos(filename):
    """return a dict of the line numbers from executable statements in a file

    Works by finding all of the code-like objects in the module then searching
    the byte code for 'SET_LINENO' terms (so this won't work one -O files).

    """
    import parser

    assert filename.endswith('.py')

    prog = open(filename).read()
    ast = parser.suite(prog)
    code = parser.compileast(ast, filename)

    # The only way I know to find line numbers is to look for the
    # SET_LINENO instructions.  Isn't there some way to get it from
    # the AST?

    return _find_LINENO(code)

### XXX because os.path.commonprefix seems broken by my way of thinking...
def commonprefix(dirs):
    "Given a list of pathnames, returns the longest common leading component"
    if not dirs: return ''
    n = copy.copy(dirs)
    for i in range(len(n)):
        n[i] = n[i].split(os.sep)
    prefix = n[0]
    for item in n:
        for i in range(len(prefix)):
            if prefix[:i+1] <> item[:i+1]:
                prefix = prefix[:i]
                if i == 0: return ''
                break
    return os.sep.join(prefix)

def create_results_log(results, dirname = ".", show_missing = 1,
                       save_counts = 0, summary = 0, coverdir = None):
    import re
    # turn the counts data ("(filename, lineno) = count") into something
    # accessible on a per-file basis
    per_file = {}
    for filename, lineno in results.counts.keys():
        lines_hit = per_file[filename] = per_file.get(filename, {})
        lines_hit[lineno] = results.counts[(filename, lineno)]

    # try and merge existing counts and modules file from dirname
    try:
        counts = marshal.load(open(os.path.join(dirname, "counts")))
        modules = marshal.load(open(os.path.join(dirname, "modules")))
        results.update(results.__class__(counts, modules))
    except IOError:
        pass

    # there are many places where this is insufficient, like a blank
    # line embedded in a multiline string.
    blank = re.compile(r'^\s*(#.*)?$')

    # accumulate summary info, if needed
    sums = {}

    # generate file paths for the coverage files we are going to write...
    fnlist = []
    tfdir = tempfile.gettempdir()
    for key in per_file.keys():
        filename = key

        # skip some "files" we don't care about...
        if filename == "<string>":
            continue
        # are these caused by code compiled using exec or something?
        if filename.startswith(tfdir):
            continue

        modulename = os.path.split(results.modules[key])[1]

        if filename.endswith(".pyc") or filename.endswith(".pyo"):
            filename = filename[:-1]

        if coverdir:
            listfilename = os.path.join(coverdir, modulename + ".cover")
        else:
            # XXX this is almost certainly not portable!!!
            fndir = os.path.dirname(filename)
            if os.path.isabs(filename):
                coverpath = fndir
            else:
                coverpath = os.path.join(dirname, fndir)

            # build list file name by appending a ".cover" to the module name
            # and sticking it into the specified directory
            if "." in modulename:
                # A module in a package
                finalname = modulename.split(".")[-1]
                listfilename = os.path.join(coverpath, finalname + ".cover")
            else:
                listfilename = os.path.join(coverpath, modulename + ".cover")

        # Get the original lines from the .py file
        try:
            lines = open(filename, 'r').readlines()
        except IOError, err:
            print >> sys.stderr, "trace: Could not open %s for reading " \
                  "because: %s - skipping" % (`filename`, err.strerror)
            continue

        try:
            outfile = open(listfilename, 'w')
        except IOError, err:
            sys.stderr.write(
                '%s: Could not open %s for writing because: %s" \
                "- skipping\n' % ("trace", `listfilename`, err.strerror))
            continue

        # If desired, get a list of the line numbers which represent
        # executable content (returned as a dict for better lookup speed)
        if show_missing:
            executable_linenos = find_executable_linenos(filename)
        else:
            executable_linenos = {}

        n_lines = 0
        n_hits = 0
        lines_hit = per_file[key]
        for i in range(len(lines)):
            line = lines[i]

            # do the blank/comment match to try to mark more lines
            # (help the reader find stuff that hasn't been covered)
            if lines_hit.has_key(i+1):
                # count precedes the lines that we captured
                outfile.write('%5d: ' % lines_hit[i+1])
                n_hits = n_hits + 1
                n_lines = n_lines + 1
            elif blank.match(line):
                # blank lines and comments are preceded by dots
                outfile.write('    . ')
            else:
                # lines preceded by no marks weren't hit
                # Highlight them if so indicated, unless the line contains
                # '#pragma: NO COVER' (it is possible to embed this into
                # the text as a non-comment; no easy fix)
                if executable_linenos.has_key(i+1) and \
                   string.find(lines[i],
                               string.join(['#pragma', 'NO COVER'])) == -1:
                    outfile.write('>>>>>> ')
                else:
                    outfile.write(' '*7)
                n_lines = n_lines + 1
            outfile.write(string.expandtabs(lines[i], 8))

        outfile.close()

        if summary and n_lines:
            percent = int(100 * n_hits / n_lines)
            sums[modulename] = n_lines, percent, modulename, filename

        if save_counts:
            # try and store counts and module info into dirname
            try:
                marshal.dump(results.counts,
                             open(os.path.join(dirname, "counts"), "w"))
                marshal.dump(results.modules,
                             open(os.path.join(dirname, "modules"), "w"))
            except IOError, err:
                sys.stderr.write("cannot save counts/modules " \
                                 "files because %s" % err.strerror)

    if summary and sums:
        mods = sums.keys()
        mods.sort()
        print "lines   cov%   module   (path)"
        for m in mods:
            n_lines, percent, modulename, filename = sums[m]
            print "%5d   %3d%%   %s   (%s)" % sums[m]

# There is a lot of code shared between these two classes even though
# it is straightforward to make a super class to share code.  However,
# for performance reasons (remember, this is called at every step) I
# wanted to keep everything to a single function call.  Also, by
# staying within a single scope, I don't have to temporarily nullify
# sys.settrace, which would slow things down even more.

class Coverage:
    def __init__(self, ignore = Ignore()):
        self.ignore = ignore
        self.ignore_names = ignore._ignore # access ignore's cache (speed hack)

        self.counts = {}   # keys are (filename, linenumber)
        self.modules = {}  # maps filename -> module name

    def trace(self, frame, why, arg):
        if why == 'line':
            # something is fishy about getting the file name
            filename = frame.f_globals.get("__file__", None)
            if filename is None:
                filename = frame.f_code.co_filename
            try:
                modulename = frame.f_globals["__name__"]
            except KeyError:
                # PyRun_String() for example
                # XXX what to do?
                modulename = None

            # We do this next block to keep from having to make methods
            # calls, which also requires resetting the trace
            ignore_it = self.ignore_names.get(modulename, -1)
            if ignore_it == -1:  # unknown filename
                sys.settrace(None)
                ignore_it = self.ignore.names(filename, modulename)
                sys.settrace(self.trace)

                # record the module name for every file
                self.modules[filename] = modulename

            if not ignore_it:
                lineno = frame.f_lineno

                # record the file name and line number of every trace
                key = (filename, lineno)
                self.counts[key] = self.counts.get(key, 0) + 1

        return self.trace

    def results(self):
        return CoverageResults(self.counts, self.modules)

class Trace:
    def __init__(self, ignore = Ignore()):
        self.ignore = ignore
        self.ignore_names = ignore._ignore # access ignore's cache (speed hack)

        self.files = {'<string>': None}  # stores lines from the .py file,
                                         # or None

    def trace(self, frame, why, arg):
        if why == 'line':
            filename = frame.f_code.co_filename
            try:
                modulename = frame.f_globals["__name__"]
            except KeyError:
                # PyRun_String() for example
                # XXX what to do?
                modulename = None

            # We do this next block to keep from having to make methods
            # calls, which also requires resetting the trace
            ignore_it = self.ignore_names.get(modulename, -1)
            if ignore_it == -1:  # unknown filename
                sys.settrace(None)
                ignore_it = self.ignore.names(filename, modulename)
                sys.settrace(self.trace)

            if not ignore_it:
                lineno = frame.f_lineno
                files = self.files

                if filename != '<string>' and not files.has_key(filename):
                    files[filename] = map(string.rstrip,
                                          open(filename).readlines())

                # If you want to see filenames (the original behaviour), try:
                #   modulename = filename
                # or, prettier but confusing when several files have the
                # same name
                #   modulename = os.path.basename(filename)

                if files[filename] != None:
                    print '%s(%d): %s' % (os.path.basename(filename), lineno,
                                          files[filename][lineno-1])
                else:
                    print '%s(%d): ??' % (modulename, lineno)

        return self.trace

def _err_exit(msg):
    print >> sys.stderr, "%s: %s" % (sys.argv[0], msg)
    sys.exit(1)

def main(argv = None):
    import getopt

    if argv is None:
        argv = sys.argv
    try:
        opts, prog_argv = getopt.getopt(argv[1:], "tcrRf:d:msC:",
                                        ["help", "version", "trace", "count",
                                         "report", "no-report",
                                         "file=", "logdir=", "missing",
                                         "ignore-module=", "ignore-dir=",
                                         "coverdir="])

    except getopt.error, msg:
        print >> sys.stderr, "%s: %s" % (sys.argv[0], msg)
        print >> sys.stderr, "Try `%s --help' for more information" \
              % sys.argv[0]
        sys.exit(1)

    trace = 0
    count = 0
    report = 0
    no_report = 0
    counts_file = None
    logdir = "."
    missing = 0
    ignore_modules = []
    ignore_dirs = []
    coverdir = None
    summary = 0

    for opt, val in opts:
        if opt == "--help":
            usage(sys.stdout)
            sys.exit(0)

        if opt == "--version":
            sys.stdout.write("trace 2.0\n")
            sys.exit(0)

        if opt == "-t" or opt == "--trace":
            trace = 1
            continue

        if opt == "-c" or opt == "--count":
            count = 1
            continue

        if opt == "-r" or opt == "--report":
            report = 1
            continue

        if opt == "-R" or opt == "--no-report":
            no_report = 1
            continue

        if opt == "-f" or opt == "--file":
            counts_file = val
            continue

        if opt == "-d" or opt == "--logdir":
            logdir = val
            continue

        if opt == "-m" or opt == "--missing":
            missing = 1
            continue

        if opt == "-C" or opt == "--coverdir":
            coverdir = val
            continue

        if opt == "-s" or opt == "--summary":
            summary = 1
            continue

        if opt == "--ignore-module":
            ignore_modules.append(val)
            continue

        if opt == "--ignore-dir":
            for s in string.split(val, os.pathsep):
                s = os.path.expandvars(s)
                # should I also call expanduser? (after all, could use $HOME)

                s = string.replace(s, "$prefix",
                                   os.path.join(sys.prefix, "lib",
                                                "python" + sys.version[:3]))
                s = string.replace(s, "$exec_prefix",
                                   os.path.join(sys.exec_prefix, "lib",
                                                "python" + sys.version[:3]))
                s = os.path.normpath(s)
                ignore_dirs.append(s)
            continue

        assert 0, "Should never get here"

    if len(prog_argv) == 0:
        _err_exit("missing name of file to run")

    if count + trace + report > 1:
        _err_exit("can only specify one of --trace, --count or --report")

    if count + trace + report == 0:
        _err_exit("must specify one of --trace, --count or --report")

    if report and counts_file is None:
        _err_exit("--report requires a --file")

    if report and no_report:
        _err_exit("cannot specify both --report and --no-report")

    if logdir is not None:
        # warn if the directory doesn't exist, but keep on going
        # (is this the correct behaviour?)
        if not os.path.isdir(logdir):
            sys.stderr.write(
                "trace: WARNING, --logdir directory %s is not available\n" %
                       `logdir`)

    sys.argv = prog_argv
    progname = prog_argv[0]
    if eval(sys.version[:3])>1.3:
        sys.path[0] = os.path.split(progname)[0] # ???

    # everything is ready
    ignore = Ignore(ignore_modules, ignore_dirs)
    if trace:
        t = Trace(ignore)
        try:
            run(t.trace, 'execfile(' + `progname` + ')')
        except IOError, err:
            _err_exit("Cannot run file %s because: %s" % \
                      (`sys.argv[0]`, err.strerror))

    elif count:
        t = Coverage(ignore)
        try:
            run(t.trace, 'execfile(' + `progname` + ')')
        except IOError, err:
            _err_exit("Cannot run file %s because: %s" % \
                      (`sys.argv[0]`, err.strerror))
        except SystemExit:
            pass

        results = t.results()
        # Add another lookup from the program's file name to its import name
        # This give the right results, but I'm not sure why ...
        results.modules[progname] = os.path.splitext(progname)[0]

        if counts_file:
            # add in archived data, if available
            try:
                old_counts, old_modules = marshal.load(open(counts_file, 'rb'))
            except IOError:
                pass
            else:
                results.update(CoverageResults(old_counts, old_modules))

        if not no_report:
            create_results_log(results, logdir, missing,
                               summary=summary, coverdir=coverdir)

        if counts_file:
            try:
                marshal.dump( (results.counts, results.modules),
                              open(counts_file, 'wb'))
            except IOError, err:
                _err_exit("Cannot save counts file %s because: %s" % \
                          (`counts_file`, err.strerror))

    elif report:
        old_counts, old_modules = marshal.load(open(counts_file, 'rb'))
        results = CoverageResults(old_counts, old_modules)
        create_results_log(results, logdir, missing,
                           summary=summary, coverdir=coverdir)

    else:
        assert 0, "Should never get here"

if __name__=='__main__':
    main()

--- NEW FILE: treesync.py ---
#! /usr/bin/env python

"""Script to synchronize two source trees.

Invoke with two arguments:

python treesync.py slave master

The assumption is that "master" contains CVS administration while
slave doesn't.  All files in the slave tree that have a CVS/Entries
entry in the master tree are synchronized.  This means:

    If the files differ:
        if the slave file is newer:
            normalize the slave file
            if the files still differ:
                copy the slave to the master
        else (the master is newer):
            copy the master to the slave

    normalizing the slave means replacing CRLF with LF when the master
    doesn't use CRLF

"""

import os, sys, stat, string, getopt

# Interactivity options
default_answer = "ask"
create_files = "yes"
create_directories = "no"
write_slave = "ask"
write_master = "ask"

def main():
    global always_no, always_yes
    global create_directories, write_master, write_slave
    opts, args = getopt.getopt(sys.argv[1:], "nym:s:d:f:a:")
    for o, a in opts:
        if o == '-y':
            default_answer = "yes"
        if o == '-n':
            default_answer = "no"
        if o == '-s':
            write_slave = a
        if o == '-m':
            write_master = a
        if o == '-d':
            create_directories = a
        if o == '-f':
            create_files = a
        if o == '-a':
            create_files = create_directories = write_slave = write_master = a
    try:
        [slave, master] = args
    except ValueError:
        print "usage: python", sys.argv[0] or "treesync.py",
        print "[-n] [-y] [-m y|n|a] [-s y|n|a] [-d y|n|a] [-f n|y|a]",
        print "slavedir masterdir"
        return
    process(slave, master)

def process(slave, master):
    cvsdir = os.path.join(master, "CVS")
    if not os.path.isdir(cvsdir):
        print "skipping master subdirectory", master
        print "-- not under CVS"
        return
    print "-"*40
    print "slave ", slave
    print "master", master
    if not os.path.isdir(slave):
        if not okay("create slave directory %s?" % slave,
                    answer=create_directories):
            print "skipping master subdirectory", master
            print "-- no corresponding slave", slave
            return
        print "creating slave directory", slave
        try:
            os.mkdir(slave)
        except os.error, msg:
            print "can't make slave directory", slave, ":", msg
            return
        else:
            print "made slave directory", slave
    cvsdir = None
    subdirs = []
    names = os.listdir(master)
    for name in names:
        mastername = os.path.join(master, name)
        slavename = os.path.join(slave, name)
        if name == "CVS":
            cvsdir = mastername
        else:
            if os.path.isdir(mastername) and not os.path.islink(mastername):
                subdirs.append((slavename, mastername))
    if cvsdir:
        entries = os.path.join(cvsdir, "Entries")
        for e in open(entries).readlines():
            words = string.split(e, '/')
            if words[0] == '' and words[1:]:
                name = words[1]
                s = os.path.join(slave, name)
                m = os.path.join(master, name)
                compare(s, m)
    for (s, m) in subdirs:
        process(s, m)

def compare(slave, master):
    try:
        sf = open(slave, 'r')
    except IOError:
        sf = None
    try:
        mf = open(master, 'rb')
    except IOError:
        mf = None
    if not sf:
        if not mf:
            print "Neither master nor slave exists", master
            return
        print "Creating missing slave", slave
        copy(master, slave, answer=create_files)
        return
    if not mf:
        print "Not updating missing master", master
        return
    if sf and mf:
        if identical(sf, mf):
            return
    sft = mtime(sf)
    mft = mtime(mf)
    if mft > sft:
        # Master is newer -- copy master to slave
        sf.close()
        mf.close()
        print "Master             ", master
        print "is newer than slave", slave
        copy(master, slave, answer=write_slave)
        return
    # Slave is newer -- copy slave to master
    print "Slave is", sft-mft, "seconds newer than master"
    # But first check what to do about CRLF
    mf.seek(0)
    fun = funnychars(mf)
    mf.close()
    sf.close()
    if fun:
        print "***UPDATING MASTER (BINARY COPY)***"
        copy(slave, master, "rb", answer=write_master)
    else:
        print "***UPDATING MASTER***"
        copy(slave, master, "r", answer=write_master)

BUFSIZE = 16*1024

def identical(sf, mf):
    while 1:
        sd = sf.read(BUFSIZE)
        md = mf.read(BUFSIZE)
        if sd != md: return 0
        if not sd: break
    return 1

def mtime(f):
    st = os.fstat(f.fileno())
    return st[stat.ST_MTIME]

def funnychars(f):
    while 1:
        buf = f.read(BUFSIZE)
        if not buf: break
        if '\r' in buf or '\0' in buf: return 1
    return 0

def copy(src, dst, rmode="rb", wmode="wb", answer='ask'):
    print "copying", src
    print "     to", dst
    if not okay("okay to copy? ", answer):
        return
    f = open(src, rmode)
    g = open(dst, wmode)
    while 1:
        buf = f.read(BUFSIZE)
        if not buf: break
        g.write(buf)
    f.close()
    g.close()

def okay(prompt, answer='ask'):
    answer = string.lower(string.strip(answer))
    if not answer or answer[0] not in 'ny':
        answer = raw_input(prompt)
        answer = string.lower(string.strip(answer))
        if not answer:
            answer = default_answer
    if answer[:1] == 'y':
        return 1
    if answer[:1] == 'n':
        return 0
    print "Yes or No please -- try again:"
    return okay(prompt)

main()

--- NEW FILE: untabify.py ---
#! /usr/bin/env python

"Replace tabs with spaces in argument files.  Print names of changed files."

import os
import sys
import string
import getopt

def main():
    tabsize = 8
    try:
        opts, args = getopt.getopt(sys.argv[1:], "t:")
        if not args:
            raise getopt.error, "At least one file argument required"
    except getopt.error, msg:
        print msg
        print "usage:", sys.argv[0], "[-t tabwidth] file ..."
        return
    for optname, optvalue in opts:
        if optname == '-t':
            tabsize = int(optvalue)

    for file in args:
        process(file, tabsize)

def process(file, tabsize):
    try:
        f = open(file)
        text = f.read()
        f.close()
    except IOError, msg:
        print "%s: I/O error: %s" % (`file`, str(msg))
        return
    newtext = string.expandtabs(text, tabsize)
    if newtext == text:
        return
    backup = file + "~"
    try:
        os.unlink(backup)
    except os.error:
        pass
    try:
        os.rename(file, backup)
    except os.error:
        pass
    f = open(file, "w")
    f.write(newtext)
    f.close()
    print file

if __name__ == '__main__':
    main()

--- NEW FILE: which.py ---
#! /usr/bin/env python

# Variant of "which".
# On stderr, near and total misses are reported.
# '-l<flags>' argument adds ls -l<flags> of each file found.

import sys
if sys.path[0] in (".", ""): del sys.path[0]

import sys, os, string
from stat import *

def msg(str):
    sys.stderr.write(str + '\n')

pathlist = string.splitfields(os.environ['PATH'], ':')

sts = 0
longlist = ''

if sys.argv[1:] and sys.argv[1][:2] == '-l':
    longlist = sys.argv[1]
    del sys.argv[1]

for prog in sys.argv[1:]:
    ident = ()
    for dir in pathlist:
        file = os.path.join(dir, prog)
        try:
            st = os.stat(file)
        except os.error:
            continue
        if not S_ISREG(st[ST_MODE]):
            msg(file + ': not a disk file')
        else:
            mode = S_IMODE(st[ST_MODE])
            if mode & 0111:
                if not ident:
                    print file
                    ident = st[:3]
                else:
                    if st[:3] == ident:
                        s = 'same as: '
                    else:
                        s = 'also: '
                    msg(s + file)
            else:
                msg(file + ': not executable')
        if longlist:
            sts = os.system('ls ' + longlist + ' ' + file)
            if sts: msg('"ls -l" exit status: ' + `sts`)
    if not ident:
        msg(prog + ': not found')
        sts = 1

sys.exit(sts)

--- NEW FILE: xxci.py ---
#! /usr/bin/env python

# xxci
#
# check in files for which rcsdiff returns nonzero exit status

import sys
import os
from stat import *
import commands
import fnmatch
import string

EXECMAGIC = '\001\140\000\010'

MAXSIZE = 200*1024 # Files this big must be binaries and are skipped.

def getargs():
    args = sys.argv[1:]
    if args:
        return args
    print 'No arguments, checking almost *, in "ls -t" order'
    list = []
    for file in os.listdir(os.curdir):
        if not skipfile(file):
            list.append((getmtime(file), file))
    list.sort()
    if not list:
        print 'Nothing to do -- exit 1'
        sys.exit(1)
    list.sort()
    list.reverse()
    for mtime, file in list: args.append(file)
    return args

def getmtime(file):
    try:
        st = os.stat(file)
        return st[ST_MTIME]
    except os.error:
        return -1

badnames = ['tags', 'TAGS', 'xyzzy', 'nohup.out', 'core']
badprefixes = ['.', ',', '@', '#', 'o.']
badsuffixes = \
        ['~', '.a', '.o', '.old', '.bak', '.orig', '.new', '.prev', '.not', \
         '.pyc', '.fdc', '.rgb', '.elc', ',v']
ignore = []

def setup():
    ignore[:] = badnames
    for p in badprefixes:
        ignore.append(p + '*')
    for p in badsuffixes:
        ignore.append('*' + p)
    try:
        f = open('.xxcign', 'r')
    except IOError:
        return
    ignore[:] = ignore + string.split(f.read())

def skipfile(file):
    for p in ignore:
        if fnmatch.fnmatch(file, p): return 1
    try:
        st = os.lstat(file)
    except os.error:
        return 1 # Doesn't exist -- skip it
    # Skip non-plain files.
    if not S_ISREG(st[ST_MODE]): return 1
    # Skip huge files -- probably binaries.
    if st[ST_SIZE] >= MAXSIZE: return 1
    # Skip executables
    try:
        data = open(file, 'r').read(len(EXECMAGIC))
        if data == EXECMAGIC: return 1
    except:
        pass
    return 0

def badprefix(file):
    for bad in badprefixes:
        if file[:len(bad)] == bad: return 1
    return 0

def badsuffix(file):
    for bad in badsuffixes:
        if file[-len(bad):] == bad: return 1
    return 0

def go(args):
    for file in args:
        print file + ':'
        if differing(file):
            showdiffs(file)
            if askyesno('Check in ' + file + ' ? '):
                sts = os.system('rcs -l ' + file) # ignored
                sts = os.system('ci -l ' + file)

def differing(file):
    cmd = 'co -p ' + file + ' 2>/dev/null | cmp -s - ' + file
    sts = os.system(cmd)
    return sts != 0

def showdiffs(file):
    cmd = 'rcsdiff ' + file + ' 2>&1 | ${PAGER-more}'
    sts = os.system(cmd)

def askyesno(prompt):
    s = raw_input(prompt)
    return s in ['y', 'yes']

try:
    setup()
    go(getargs())
except KeyboardInterrupt:
    print '[Intr]'