[Python-checkins] CVS: python/dist/src/Tools/scripts dutree.doc,NONE,1.1.12.1 dutree.py,NONE,1.10.6.1 eptags.py,NONE,1.8.8.1 findlinksto.py,NONE,1.8.6.1 fixcid.py,NONE,1.9.6.1 fixheader.py,NONE,1.3.6.1 fixnotice.py,NONE,1.5.6.1 fixps.py,NONE,1.5.6.1 ftpmirror.py,NONE,1.14.2.1 gencodec.py,NONE,1.6.2.1 h2py.py,NONE,1.12.6.1 ifdef.py,NONE,1.4.6.1 lfcr.py,NONE,1.3.10.1 linktree.py,NONE,1.6.6.1 lll.py,NONE,1.5.6.1 logmerge.py,NONE,1.7.4.1 mailerdaemon.py,NONE,1.9.6.1 md5sum.py,NONE,1.1.2.1 methfix.py,NONE,1.6.6.1 mkreal.py,NONE,1.6.6.1 ndiff.py,NONE,1.8.6.1 nm2def.py,NONE,1.4.6.1 objgraph.py,NONE,1.5.6.1 parseentities.py,NONE,1.3.6.1 pathfix.py,NONE,1.4.6.1 pdeps.py,NONE,1.5.6.1 pindent.py,NONE,1.10.6.1 ptags.py,NONE,1.7.6.1 pydoc.pyw,NONE,1.1.4.1 redemo.py,NONE,1.2.6.1 reindent.py,NONE,1.1.8.1 rgrep.py,NONE,1.1.12.1 suff.py,NONE,1.5.6.1 sum5.py,NONE,1.3.6.1 texi2html.py,NONE,1.12.6.1 trace.py,NONE,1.4.2.1 treesync.py,NONE,1.5.6.1 untabify.py,NONE,1.2.12.1 which.py,NONE,1.9.6.1 xxci.py,NONE,1.14.6.1
Tim Peters
tim_one@users.sourceforge.net
Fri, 06 Jul 2001 10:08:51 -0700
Update of /cvsroot/python/python/dist/src/Tools/scripts
In directory usw-pr-cvs1:/tmp/cvs-serv3084
Added Files:
Tag: descr-branch
dutree.doc dutree.py eptags.py findlinksto.py fixcid.py
fixheader.py fixnotice.py fixps.py ftpmirror.py gencodec.py
h2py.py ifdef.py lfcr.py linktree.py lll.py logmerge.py
mailerdaemon.py md5sum.py methfix.py mkreal.py ndiff.py
nm2def.py objgraph.py parseentities.py pathfix.py pdeps.py
pindent.py ptags.py pydoc.pyw redemo.py reindent.py rgrep.py
suff.py sum5.py texi2html.py trace.py treesync.py untabify.py
which.py xxci.py
Log Message:
Adding "the rest" of Tools/scripts to descr-branch.
--- NEW FILE: dutree.doc ---
Path: cwi.nl!sun4nl!mcsun!uunet!cs.utexas.edu!convex!usenet
From: tchrist@convex.COM (Tom Christiansen)
Newsgroups: comp.lang.perl
Subject: Re: The problems of Perl (Re: Question (silly?))
Message-ID: <1992Jan17.053115.4220@convex.com>
Date: 17 Jan 92 05:31:15 GMT
References: <17458@ector.cs.purdue.edu> <1992Jan16.165347.25583@cherokee.uswest.com> <=#Hues+4@cs.psu.edu>
Sender: usenet@convex.com (news access account)
Reply-To: tchrist@convex.COM (Tom Christiansen)
Organization: CONVEX Realtime Development, Colorado Springs, CO
Lines: 83
Nntp-Posting-Host: pixel.convex.com
>From the keyboard of flee@cs.psu.edu (Felix Lee):
:And Perl is definitely awkward with data types. I haven't yet found a
:pleasant way of shoving non-trivial data types into Perl's grammar.
Yes, it's pretty aweful at that, alright. Sometimes I write perl programs
that need them, and sometimes it just takes a little creativity. But
sometimes it's not worth it. I actually wrote a C program the other day
(gasp) because I didn't want to deal with a game matrix with six links per node.
:Here's a very simple problem that's tricky to express in Perl: process
:the output of "du" to produce output that's indented to reflect the
:tree structure, and with each subtree sorted by size. Something like:
: 434 /etc
: | 344 .
: | 50 install
: | 35 uucp
: | 3 nserve
: | | 2 .
: | | 1 auth.info
: | 1 sm
: | 1 sm.bak
At first I thought I could just keep one local list around
at once, but this seems inherently recursive. Which means
I need an real recursive data structure. Maybe you could
do it with one of the %assoc arrays Larry uses in the begat
programs, but I broke down and got dirty. I think the hardest
part was matching Felix's desired output exactly. It's not
blazingly fast: I should probably inline the &childof routine,
but it *was* faster to write than I could have written the
equivalent C program.
--tom
--
"GUIs normally make it simple to accomplish simple actions and impossible
to accomplish complex actions." --Doug Gwyn (22/Jun/91 in comp.unix.wizards)
Tom Christiansen tchrist@convex.com convex!tchrist
--- NEW FILE: dutree.py ---
#! /usr/bin/env python
# Format du output in a tree shape
import os, string, sys, errno
def main():
p = os.popen('du ' + string.join(sys.argv[1:]), 'r')
total, d = None, {}
for line in p.readlines():
i = 0
while line[i] in '0123456789': i = i+1
size = eval(line[:i])
while line[i] in ' \t': i = i+1
file = line[i:-1]
comps = string.splitfields(file, '/')
if comps[0] == '': comps[0] = '/'
if comps[len(comps)-1] == '': del comps[len(comps)-1]
total, d = store(size, comps, total, d)
try:
display(total, d)
except IOError, e:
if e.errno != errno.EPIPE:
raise
def store(size, comps, total, d):
if comps == []:
return size, d
if not d.has_key(comps[0]):
d[comps[0]] = None, {}
t1, d1 = d[comps[0]]
d[comps[0]] = store(size, comps[1:], t1, d1)
return total, d
def display(total, d):
show(total, d, '')
def show(total, d, prefix):
if not d: return
list = []
sum = 0
for key in d.keys():
tsub, dsub = d[key]
list.append((tsub, key))
if tsub is not None: sum = sum + tsub
## if sum < total:
## list.append((total - sum, os.curdir))
list.sort()
list.reverse()
width = len(`list[0][0]`)
for tsub, key in list:
if tsub is None:
psub = prefix
else:
print prefix + string.rjust(`tsub`, width) + ' ' + key
psub = prefix + ' '*(width-1) + '|' + ' '*(len(key)+1)
if d.has_key(key):
show(tsub, d[key][1], psub)
main()
--- NEW FILE: eptags.py ---
#! /usr/bin/env python
"""Create a TAGS file for Python programs, usable with GNU Emacs.
usage: eptags pyfiles...
The output TAGS file is usable with Emacs version 18, 19, 20.
Tagged are:
- functions (even inside other defs or classes)
- classes
eptags warns about files it cannot open.
eptags will not give warnings about duplicate tags.
BUGS:
Because of tag duplication (methods with the same name in different
classes), TAGS files are not very useful for most object-oriented
python projects.
"""
import sys,re
expr = r'^[ \t]*(def|class)[ \t]+([a-zA-Z_][a-zA-Z0-9_]*)[ \t]*[:\(]'
matcher = re.compile(expr)
def treat_file(file, outfp):
"""Append tags found in file named 'file' to the open file 'outfp'"""
try:
fp = open(file, 'r')
except:
sys.stderr.write('Cannot open %s\n'%file)
return
charno = 0
lineno = 0
tags = []
size = 0
while 1:
line = fp.readline()
if not line:
break
lineno = lineno + 1
m = matcher.search(line)
if m:
tag = m.group(0) + '\177%d,%d\n'%(lineno,charno)
tags.append(tag)
size = size + len(tag)
charno = charno + len(line)
outfp.write('\f\n%s,%d\n'%(file,size))
for tag in tags:
outfp.write(tag)
def main():
outfp = open('TAGS', 'w')
for file in sys.argv[1:]:
treat_file(file, outfp)
if __name__=="__main__":
main()
--- NEW FILE: findlinksto.py ---
#! /usr/bin/env python
# findlinksto
#
# find symbolic links to a path matching a regular expression
import os
import sys
import regex
import getopt
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], '')
if len(args) < 2:
raise getopt.error, 'not enough arguments'
except getopt.error, msg:
sys.stdout = sys.stderr
print msg
print 'usage: findlinksto pattern directory ...'
sys.exit(2)
pat, dirs = args[0], args[1:]
prog = regex.compile(pat)
for dirname in dirs:
os.path.walk(dirname, visit, prog)
def visit(prog, dirname, names):
if os.path.islink(dirname):
names[:] = []
return
if os.path.ismount(dirname):
print 'descend into', dirname
for name in names:
name = os.path.join(dirname, name)
try:
linkto = os.readlink(name)
if prog.search(linkto) >= 0:
print name, '->', linkto
except os.error:
pass
main()
--- NEW FILE: fixcid.py ---
#! /usr/bin/env python
# Perform massive identifier substitution on C source files.
# This actually tokenizes the files (to some extent) so it can
# avoid making substitutions inside strings or comments.
# Inside strings, substitutions are never made; inside comments,
# it is a user option (off by default).
#
# The substitutions are read from one or more files whose lines,
# when not empty, after stripping comments starting with #,
# must contain exactly two words separated by whitespace: the
# old identifier and its replacement.
#
# The option -r reverses the sense of the substitutions (this may be
# useful to undo a particular substitution).
#
# If the old identifier is prefixed with a '*' (with no intervening
# whitespace), then it will not be substituted inside comments.
#
# Command line arguments are files or directories to be processed.
# Directories are searched recursively for files whose name looks
# like a C file (ends in .h or .c). The special filename '-' means
# operate in filter mode: read stdin, write stdout.
#
# Symbolic links are always ignored (except as explicit directory
# arguments).
#
# The original files are kept as back-up with a "~" suffix.
#
# Changes made are reported to stdout in a diff-like format.
#
# NB: by changing only the function fixline() you can turn this
# into a program for different changes to C source files; by
# changing the function wanted() you can make a different selection of
# files.
import sys
import regex
import string
import os
from stat import *
import getopt
err = sys.stderr.write
dbg = err
rep = sys.stdout.write
def usage():
progname = sys.argv[0]
err('Usage: ' + progname +
' [-c] [-r] [-s file] ... file-or-directory ...\n')
err('\n')
err('-c : substitute inside comments\n')
err('-r : reverse direction for following -s options\n')
err('-s substfile : add a file of substitutions\n')
err('\n')
err('Each non-empty non-comment line in a substitution file must\n')
err('contain exactly two words: an identifier and its replacement.\n')
err('Comments start with a # character and end at end of line.\n')
err('If an identifier is preceded with a *, it is not substituted\n')
err('inside a comment even when -c is specified.\n')
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], 'crs:')
except getopt.error, msg:
err('Options error: ' + str(msg) + '\n')
usage()
sys.exit(2)
bad = 0
if not args: # No arguments
usage()
sys.exit(2)
for opt, arg in opts:
if opt == '-c':
setdocomments()
if opt == '-r':
setreverse()
if opt == '-s':
addsubst(arg)
for arg in args:
if os.path.isdir(arg):
if recursedown(arg): bad = 1
elif os.path.islink(arg):
err(arg + ': will not process symbolic links\n')
bad = 1
else:
if fix(arg): bad = 1
sys.exit(bad)
# Change this regular expression to select a different set of files
Wanted = '^[a-zA-Z0-9_]+\.[ch]$'
def wanted(name):
return regex.match(Wanted, name) >= 0
def recursedown(dirname):
dbg('recursedown(' + `dirname` + ')\n')
bad = 0
try:
names = os.listdir(dirname)
except os.error, msg:
err(dirname + ': cannot list directory: ' + str(msg) + '\n')
return 1
names.sort()
subdirs = []
for name in names:
if name in (os.curdir, os.pardir): continue
fullname = os.path.join(dirname, name)
if os.path.islink(fullname): pass
elif os.path.isdir(fullname):
subdirs.append(fullname)
elif wanted(name):
if fix(fullname): bad = 1
for fullname in subdirs:
if recursedown(fullname): bad = 1
return bad
def fix(filename):
## dbg('fix(' + `filename` + ')\n')
if filename == '-':
# Filter mode
f = sys.stdin
g = sys.stdout
else:
# File replacement mode
try:
f = open(filename, 'r')
except IOError, msg:
err(filename + ': cannot open: ' + str(msg) + '\n')
return 1
head, tail = os.path.split(filename)
tempname = os.path.join(head, '@' + tail)
g = None
# If we find a match, we rewind the file and start over but
# now copy everything to a temp file.
lineno = 0
initfixline()
while 1:
line = f.readline()
if not line: break
lineno = lineno + 1
while line[-2:] == '\\\n':
nextline = f.readline()
if not nextline: break
line = line + nextline
lineno = lineno + 1
newline = fixline(line)
if newline != line:
if g is None:
try:
g = open(tempname, 'w')
except IOError, msg:
f.close()
err(tempname+': cannot create: '+
str(msg)+'\n')
return 1
f.seek(0)
lineno = 0
initfixline()
rep(filename + ':\n')
continue # restart from the beginning
rep(`lineno` + '\n')
rep('< ' + line)
rep('> ' + newline)
if g is not None:
g.write(newline)
# End of file
if filename == '-': return 0 # Done in filter mode
f.close()
if not g: return 0 # No changes
# Finishing touch -- move files
# First copy the file's mode to the temp file
try:
statbuf = os.stat(filename)
os.chmod(tempname, statbuf[ST_MODE] & 07777)
except os.error, msg:
err(tempname + ': warning: chmod failed (' + str(msg) + ')\n')
# Then make a backup of the original file as filename~
try:
os.rename(filename, filename + '~')
except os.error, msg:
err(filename + ': warning: backup failed (' + str(msg) + ')\n')
# Now move the temp file to the original file
try:
os.rename(tempname, filename)
except os.error, msg:
err(filename + ': rename failed (' + str(msg) + ')\n')
return 1
# Return succes
return 0
# Tokenizing ANSI C (partly)
Identifier = '\(struct \)?[a-zA-Z_][a-zA-Z0-9_]+'
String = '"\([^\n\\"]\|\\\\.\)*"'
Char = '\'\([^\n\\\']\|\\\\.\)*\''
CommentStart = '/\*'
CommentEnd = '\*/'
Hexnumber = '0[xX][0-9a-fA-F]*[uUlL]*'
Octnumber = '0[0-7]*[uUlL]*'
Decnumber = '[1-9][0-9]*[uUlL]*'
Intnumber = Hexnumber + '\|' + Octnumber + '\|' + Decnumber
Exponent = '[eE][-+]?[0-9]+'
Pointfloat = '\([0-9]+\.[0-9]*\|\.[0-9]+\)\(' + Exponent + '\)?'
Expfloat = '[0-9]+' + Exponent
Floatnumber = Pointfloat + '\|' + Expfloat
Number = Floatnumber + '\|' + Intnumber
# Anything else is an operator -- don't list this explicitly because of '/*'
OutsideComment = (Identifier, Number, String, Char, CommentStart)
OutsideCommentPattern = '\(' + string.joinfields(OutsideComment, '\|') + '\)'
OutsideCommentProgram = regex.compile(OutsideCommentPattern)
InsideComment = (Identifier, Number, CommentEnd)
InsideCommentPattern = '\(' + string.joinfields(InsideComment, '\|') + '\)'
InsideCommentProgram = regex.compile(InsideCommentPattern)
def initfixline():
global Program
Program = OutsideCommentProgram
def fixline(line):
global Program
## print '-->', `line`
i = 0
while i < len(line):
i = Program.search(line, i)
if i < 0: break
found = Program.group(0)
## if Program is InsideCommentProgram: print '...',
## else: print ' ',
## print found
if len(found) == 2:
if found == '/*':
Program = InsideCommentProgram
elif found == '*/':
Program = OutsideCommentProgram
n = len(found)
if Dict.has_key(found):
subst = Dict[found]
if Program is InsideCommentProgram:
if not Docomments:
print 'Found in comment:', found
i = i + n
continue
if NotInComment.has_key(found):
## print 'Ignored in comment:',
## print found, '-->', subst
## print 'Line:', line,
subst = found
## else:
## print 'Substituting in comment:',
## print found, '-->', subst
## print 'Line:', line,
line = line[:i] + subst + line[i+n:]
n = len(subst)
i = i + n
return line
Docomments = 0
def setdocomments():
global Docomments
Docomments = 1
Reverse = 0
def setreverse():
global Reverse
Reverse = (not Reverse)
Dict = {}
NotInComment = {}
def addsubst(substfile):
try:
fp = open(substfile, 'r')
except IOError, msg:
err(substfile + ': cannot read substfile: ' + str(msg) + '\n')
sys.exit(1)
lineno = 0
while 1:
line = fp.readline()
if not line: break
lineno = lineno + 1
try:
i = string.index(line, '#')
except string.index_error:
i = -1 # Happens to delete trailing \n
words = string.split(line[:i])
if not words: continue
if len(words) == 3 and words[0] == 'struct':
words[:2] = [words[0] + ' ' + words[1]]
elif len(words) <> 2:
err(substfile + ':' + `lineno` +
': warning: bad line: ' + line)
continue
if Reverse:
[value, key] = words
else:
[key, value] = words
if value[0] == '*':
value = value[1:]
if key[0] == '*':
key = key[1:]
NotInComment[key] = value
if Dict.has_key(key):
err(substfile + ':' + `lineno` +
': warning: overriding: ' +
key + ' ' + value + '\n')
err(substfile + ':' + `lineno` +
': warning: previous: ' + Dict[key] + '\n')
Dict[key] = value
fp.close()
main()
--- NEW FILE: fixheader.py ---
#! /usr/bin/env python
# Add some standard cpp magic to a header file
import sys
import string
def main():
args = sys.argv[1:]
for file in args:
process(file)
def process(file):
try:
f = open(file, 'r')
except IOError, msg:
sys.stderr.write('%s: can\'t open: %s\n' % (file, str(msg)))
return
data = f.read()
f.close()
if data[:2] <> '/*':
sys.stderr.write('%s does not begin with C comment\n' % file)
return
try:
f = open(file, 'w')
except IOError, msg:
sys.stderr.write('%s: can\'t write: %s\n' % (file, str(msg)))
return
sys.stderr.write('Processing %s ...\n' % file)
magic = 'Py_'
for c in file:
if c in string.letters + string.digits:
magic = magic + string.upper(c)
else: magic = magic + '_'
sys.stdout = f
print '#ifndef', magic
print '#define', magic
print '#ifdef __cplusplus'
print 'extern "C" {'
print '#endif'
print
f.write(data)
print
print '#ifdef __cplusplus'
print '}'
print '#endif'
print '#endif /*', '!'+magic, '*/'
main()
--- NEW FILE: fixnotice.py ---
#! /usr/bin/env python
OLD_NOTICE = """/***********************************************************
Copyright (c) 2000, BeOpen.com.
Copyright (c) 1995-2000, Corporation for National Research Initiatives.
Copyright (c) 1990-1995, Stichting Mathematisch Centrum.
All rights reserved.
See the file "Misc/COPYRIGHT" for information on usage and
redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
******************************************************************/
"""
NEW_NOTICE = ""
# " <-- Help Emacs
import os, sys, string
def main():
args = sys.argv[1:]
if not args:
print "No arguments."
for arg in args:
process(arg)
def process(arg):
f = open(arg)
data = f.read()
f.close()
i = string.find(data, OLD_NOTICE)
if i < 0:
## print "No old notice in", arg
return
data = data[:i] + NEW_NOTICE + data[i+len(OLD_NOTICE):]
new = arg + ".new"
backup = arg + ".bak"
print "Replacing notice in", arg, "...",
sys.stdout.flush()
f = open(new, "w")
f.write(data)
f.close()
os.rename(arg, backup)
os.rename(new, arg)
print "done"
if __name__ == '__main__':
main()
--- NEW FILE: fixps.py ---
#!/usr/bin/env python
# Fix Python script(s) to reference the interpreter via /usr/bin/env python.
# Warning: this overwrites the file without making a backup.
import sys
import re
def main():
for file in sys.argv[1:]:
try:
f = open(file, 'r')
except IOError, msg:
print file, ': can\'t open :', msg
continue
line = f.readline()
if not re.match('^#! */usr/local/bin/python', line):
print file, ': not a /usr/local/bin/python script'
f.close()
continue
rest = f.read()
f.close()
line = re.sub('/usr/local/bin/python',
'/usr/bin/env python', line)
print file, ':', `line`
f = open(file, "w")
f.write(line)
f.write(rest)
f.close()
main()
--- NEW FILE: ftpmirror.py ---
#! /usr/bin/env python
"""Mirror a remote ftp subtree into a local directory tree.
usage: ftpmirror [-v] [-q] [-i] [-m] [-n] [-r] [-s pat]
[-l username [-p passwd [-a account]]]
hostname [remotedir [localdir]]
-v: verbose
-q: quiet
-i: interactive mode
-m: macintosh server (NCSA telnet 2.4) (implies -n -s '*.o')
-n: don't log in
-r: remove local files/directories no longer pertinent
-l username [-p passwd [-a account]]: login info (default .netrc or anonymous)
-s pat: skip files matching pattern
hostname: remote host
remotedir: remote directory (default initial)
localdir: local directory (default current)
"""
import os
import sys
import time
import getopt
import string
import ftplib
import netrc
from fnmatch import fnmatch
# Print usage message and exit
def usage(*args):
sys.stdout = sys.stderr
for msg in args: print msg
print __doc__
sys.exit(2)
verbose = 1 # 0 for -q, 2 for -v
interactive = 0
mac = 0
rmok = 0
nologin = 0
skippats = ['.', '..', '.mirrorinfo']
# Main program: parse command line and start processing
def main():
global verbose, interactive, mac, rmok, nologin
try:
opts, args = getopt.getopt(sys.argv[1:], 'a:bil:mnp:qrs:v')
except getopt.error, msg:
usage(msg)
login = ''
passwd = ''
account = ''
if not args: usage('hostname missing')
host = args[0]
try:
auth = netrc.netrc().authenticators(host)
if auth is not None:
login, account, passwd = auth
except (netrc.NetrcParseError, IOError):
pass
for o, a in opts:
if o == '-l': login = a
if o == '-p': passwd = a
if o == '-a': account = a
if o == '-v': verbose = verbose + 1
if o == '-q': verbose = 0
if o == '-i': interactive = 1
if o == '-m': mac = 1; nologin = 1; skippats.append('*.o')
if o == '-n': nologin = 1
if o == '-r': rmok = 1
if o == '-s': skippats.append(a)
remotedir = ''
localdir = ''
if args[1:]:
remotedir = args[1]
if args[2:]:
localdir = args[2]
if args[3:]: usage('too many arguments')
#
f = ftplib.FTP()
if verbose: print 'Connecting to %s...' % `host`
f.connect(host)
if not nologin:
if verbose:
print 'Logging in as %s...' % `login or 'anonymous'`
f.login(login, passwd, account)
if verbose: print 'OK.'
pwd = f.pwd()
if verbose > 1: print 'PWD =', `pwd`
if remotedir:
if verbose > 1: print 'cwd(%s)' % `remotedir`
f.cwd(remotedir)
if verbose > 1: print 'OK.'
pwd = f.pwd()
if verbose > 1: print 'PWD =', `pwd`
#
mirrorsubdir(f, localdir)
# Core logic: mirror one subdirectory (recursively)
def mirrorsubdir(f, localdir):
pwd = f.pwd()
if localdir and not os.path.isdir(localdir):
if verbose: print 'Creating local directory', `localdir`
try:
makedir(localdir)
except os.error, msg:
print "Failed to establish local directory", `localdir`
return
infofilename = os.path.join(localdir, '.mirrorinfo')
try:
text = open(infofilename, 'r').read()
except IOError, msg:
text = '{}'
try:
info = eval(text)
except (SyntaxError, NameError):
print 'Bad mirror info in %s' % `infofilename`
info = {}
subdirs = []
listing = []
if verbose: print 'Listing remote directory %s...' % `pwd`
f.retrlines('LIST', listing.append)
filesfound = []
for line in listing:
if verbose > 1: print '-->', `line`
if mac:
# Mac listing has just filenames;
# trailing / means subdirectory
filename = string.strip(line)
mode = '-'
if filename[-1:] == '/':
filename = filename[:-1]
mode = 'd'
infostuff = ''
else:
# Parse, assuming a UNIX listing
words = string.split(line, None, 8)
if len(words) < 6:
if verbose > 1: print 'Skipping short line'
continue
filename = string.lstrip(words[-1])
i = string.find(filename, " -> ")
if i >= 0:
# words[0] had better start with 'l'...
if verbose > 1:
print 'Found symbolic link %s' % `filename`
linkto = filename[i+4:]
filename = filename[:i]
infostuff = words[-5:-1]
mode = words[0]
skip = 0
for pat in skippats:
if fnmatch(filename, pat):
if verbose > 1:
print 'Skip pattern', `pat`,
print 'matches', `filename`
skip = 1
break
if skip:
continue
if mode[0] == 'd':
if verbose > 1:
print 'Remembering subdirectory', `filename`
subdirs.append(filename)
continue
filesfound.append(filename)
if info.has_key(filename) and info[filename] == infostuff:
if verbose > 1:
print 'Already have this version of',`filename`
continue
fullname = os.path.join(localdir, filename)
tempname = os.path.join(localdir, '@'+filename)
if interactive:
doit = askabout('file', filename, pwd)
if not doit:
if not info.has_key(filename):
info[filename] = 'Not retrieved'
continue
try:
os.unlink(tempname)
except os.error:
pass
if mode[0] == 'l':
if verbose:
print "Creating symlink %s -> %s" % (
`filename`, `linkto`)
try:
os.symlink(linkto, tempname)
except IOError, msg:
print "Can't create %s: %s" % (
`tempname`, str(msg))
continue
else:
try:
fp = open(tempname, 'wb')
except IOError, msg:
print "Can't create %s: %s" % (
`tempname`, str(msg))
continue
if verbose:
print 'Retrieving %s from %s as %s...' % \
(`filename`, `pwd`, `fullname`)
if verbose:
fp1 = LoggingFile(fp, 1024, sys.stdout)
else:
fp1 = fp
t0 = time.time()
try:
f.retrbinary('RETR ' + filename,
fp1.write, 8*1024)
except ftplib.error_perm, msg:
print msg
t1 = time.time()
bytes = fp.tell()
fp.close()
if fp1 != fp:
fp1.close()
try:
os.unlink(fullname)
except os.error:
pass # Ignore the error
try:
os.rename(tempname, fullname)
except os.error, msg:
print "Can't rename %s to %s: %s" % (`tempname`,
`fullname`,
str(msg))
continue
info[filename] = infostuff
writedict(info, infofilename)
if verbose and mode[0] != 'l':
dt = t1 - t0
kbytes = bytes / 1024.0
print int(round(kbytes)),
print 'Kbytes in',
print int(round(dt)),
print 'seconds',
if t1 > t0:
print '(~%d Kbytes/sec)' % \
int(round(kbytes/dt),)
print
#
# Remove files from info that are no longer remote
deletions = 0
for filename in info.keys():
if filename not in filesfound:
if verbose:
print "Removing obsolete info entry for",
print `filename`, "in", `localdir or "."`
del info[filename]
deletions = deletions + 1
if deletions:
writedict(info, infofilename)
#
# Remove local files that are no longer in the remote directory
try:
if not localdir: names = os.listdir(os.curdir)
else: names = os.listdir(localdir)
except os.error:
names = []
for name in names:
if name[0] == '.' or info.has_key(name) or name in subdirs:
continue
skip = 0
for pat in skippats:
if fnmatch(name, pat):
if verbose > 1:
print 'Skip pattern', `pat`,
print 'matches', `name`
skip = 1
break
if skip:
continue
fullname = os.path.join(localdir, name)
if not rmok:
if verbose:
print 'Local file', `fullname`,
print 'is no longer pertinent'
continue
if verbose: print 'Removing local file/dir', `fullname`
remove(fullname)
#
# Recursively mirror subdirectories
for subdir in subdirs:
if interactive:
doit = askabout('subdirectory', subdir, pwd)
if not doit: continue
if verbose: print 'Processing subdirectory', `subdir`
localsubdir = os.path.join(localdir, subdir)
pwd = f.pwd()
if verbose > 1:
print 'Remote directory now:', `pwd`
print 'Remote cwd', `subdir`
try:
f.cwd(subdir)
except ftplib.error_perm, msg:
print "Can't chdir to", `subdir`, ":", `msg`
else:
if verbose: print 'Mirroring as', `localsubdir`
mirrorsubdir(f, localsubdir)
if verbose > 1: print 'Remote cwd ..'
f.cwd('..')
newpwd = f.pwd()
if newpwd != pwd:
print 'Ended up in wrong directory after cd + cd ..'
print 'Giving up now.'
break
else:
if verbose > 1: print 'OK.'
# Helper to remove a file or directory tree
def remove(fullname):
if os.path.isdir(fullname) and not os.path.islink(fullname):
try:
names = os.listdir(fullname)
except os.error:
names = []
ok = 1
for name in names:
if not remove(os.path.join(fullname, name)):
ok = 0
if not ok:
return 0
try:
os.rmdir(fullname)
except os.error, msg:
print "Can't remove local directory %s: %s" % \
(`fullname`, str(msg))
return 0
else:
try:
os.unlink(fullname)
except os.error, msg:
print "Can't remove local file %s: %s" % \
(`fullname`, str(msg))
return 0
return 1
# Wrapper around a file for writing to write a hash sign every block.
class LoggingFile:
def __init__(self, fp, blocksize, outfp):
self.fp = fp
self.bytes = 0
self.hashes = 0
self.blocksize = blocksize
self.outfp = outfp
def write(self, data):
self.bytes = self.bytes + len(data)
hashes = int(self.bytes) / self.blocksize
while hashes > self.hashes:
self.outfp.write('#')
self.outfp.flush()
self.hashes = self.hashes + 1
self.fp.write(data)
def close(self):
self.outfp.write('\n')
# Ask permission to download a file.
def askabout(filetype, filename, pwd):
prompt = 'Retrieve %s %s from %s ? [ny] ' % (filetype, filename, pwd)
while 1:
reply = string.lower(string.strip(raw_input(prompt)))
if reply in ['y', 'ye', 'yes']:
return 1
if reply in ['', 'n', 'no', 'nop', 'nope']:
return 0
print 'Please answer yes or no.'
# Create a directory if it doesn't exist. Recursively create the
# parent directory as well if needed.
def makedir(pathname):
if os.path.isdir(pathname):
return
dirname = os.path.dirname(pathname)
if dirname: makedir(dirname)
os.mkdir(pathname, 0777)
# Write a dictionary to a file in a way that can be read back using
# rval() but is still somewhat readable (i.e. not a single long line).
# Also creates a backup file.
def writedict(dict, filename):
dir, file = os.path.split(filename)
tempname = os.path.join(dir, '@' + file)
backup = os.path.join(dir, file + '~')
try:
os.unlink(backup)
except os.error:
pass
fp = open(tempname, 'w')
fp.write('{\n')
for key, value in dict.items():
fp.write('%s: %s,\n' % (`key`, `value`))
fp.write('}\n')
fp.close()
try:
os.rename(filename, backup)
except os.error:
pass
os.rename(tempname, filename)
if __name__ == '__main__':
main()
--- NEW FILE: gencodec.py ---
""" Unicode Mapping Parser and Codec Generator.
This script parses Unicode mapping files as available from the Unicode
site (ftp://ftp.unicode.org/Public/MAPPINGS/) and creates Python codec
modules from them. The codecs use the standard character mapping codec
to actually apply the mapping.
Synopsis: gencodec.py dir codec_prefix
All files in dir are scanned and those producing non-empty mappings
will be written to <codec_prefix><mapname>.py with <mapname> being the
first part of the map's filename ('a' in a.b.c.txt) converted to
lowercase with hyphens replaced by underscores.
The tool also writes marshalled versions of the mapping tables to the
same location (with .mapping extension).
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright Guido van Rossum, 2000.
"""#"
import string,re,os,time,marshal
# Create numeric tables or character based ones ?
numeric = 1
mapRE = re.compile('((?:0x[0-9a-fA-F]+\+?)+)'
'\s+'
'((?:(?:0x[0-9a-fA-Z]+|<[A-Za-z]+>)\+?)*)'
'\s*'
'(#.+)?')
def parsecodes(codes,
split=string.split,atoi=string.atoi,len=len,
filter=filter,range=range):
""" Converts code combinations to either a single code integer
or a tuple of integers.
meta-codes (in angular brackets, e.g. <LR> and <RL>) are
ignored.
Empty codes or illegal ones are returned as None.
"""
if not codes:
return None
l = split(codes,'+')
if len(l) == 1:
return atoi(l[0],16)
for i in range(len(l)):
try:
l[i] = atoi(l[i],16)
except ValueError:
l[i] = None
l = filter(lambda x: x is not None, l)
if len(l) == 1:
return l[0]
else:
return tuple(l)
def readmap(filename,
strip=string.strip):
f = open(filename,'r')
lines = f.readlines()
f.close()
enc2uni = {}
identity = []
unmapped = range(256)
for i in range(256):
unmapped[i] = i
for line in lines:
line = strip(line)
if not line or line[0] == '#':
continue
m = mapRE.match(line)
if not m:
#print '* not matched: %s' % repr(line)
continue
enc,uni,comment = m.groups()
enc = parsecodes(enc)
uni = parsecodes(uni)
if not comment:
comment = ''
else:
comment = comment[1:]
if enc < 256:
unmapped.remove(enc)
if enc == uni:
identity.append(enc)
else:
enc2uni[enc] = (uni,comment)
else:
enc2uni[enc] = (uni,comment)
# If there are more identity-mapped entries than unmapped entries,
# it pays to generate an identity dictionary first, add add explicit
# mappings to None for the rest
if len(identity)>=len(unmapped):
for enc in unmapped:
enc2uni[enc] = (None, "")
enc2uni['IDENTITY'] = 256
return enc2uni
def hexrepr(t,
join=string.join):
if t is None:
return 'None'
try:
len(t)
except:
return '0x%04x' % t
return '(' + join(map(lambda t: '0x%04x' % t, t),', ') + ')'
def unicoderepr(t,
join=string.join):
if t is None:
return 'None'
if numeric:
return hexrepr(t)
else:
try:
len(t)
except:
return repr(unichr(t))
return repr(join(map(unichr, t),''))
def keyrepr(t,
join=string.join):
if t is None:
return 'None'
if numeric:
return hexrepr(t)
else:
try:
len(t)
except:
if t < 256:
return repr(chr(t))
else:
return repr(unichr(t))
return repr(join(map(chr, t),''))
def codegen(name,map,comments=1):
""" Returns Python source for the given map.
Comments are included in the source, if comments is true (default).
"""
l = [
'''\
""" Python Character Mapping Codec generated from '%s' with gencodec.py.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
(c) Copyright 2000 Guido van Rossum.
"""#"
import codecs
### Codec APIs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_map)
class StreamWriter(Codec,codecs.StreamWriter):
pass
class StreamReader(Codec,codecs.StreamReader):
pass
### encodings module API
def getregentry():
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
### Decoding Map
''' % name,
]
if map.has_key("IDENTITY"):
l.append("decoding_map = codecs.make_identity_dict(range(%d))"
% map["IDENTITY"])
l.append("decoding_map.update({")
splits = 1
del map["IDENTITY"]
else:
l.append("decoding_map = {")
splits = 0
mappings = map.items()
mappings.sort()
append = l.append
i = 0
for e,value in mappings:
try:
(u,c) = value
except TypeError:
u = value
c = ''
key = keyrepr(e)
if c and comments:
append('\t%s: %s,\t# %s' % (key,unicoderepr(u),c))
else:
append('\t%s: %s,' % (key,unicoderepr(u)))
i += 1
if i == 4096:
# Split the definition into parts to that the Python
# parser doesn't dump core
if splits == 0:
append('}')
else:
append('})')
append('decoding_map.update({')
i = 0
splits = splits + 1
if splits == 0:
append('}')
else:
append('})')
append('''
### Encoding Map
encoding_map = codecs.make_encoding_map(decoding_map)
''')
return string.join(l,'\n')
def pymap(name,map,pyfile,comments=1):
code = codegen(name,map,comments)
f = open(pyfile,'w')
f.write(code)
f.close()
def marshalmap(name,map,marshalfile):
d = {}
for e,(u,c) in map.items():
d[e] = (u,c)
f = open(marshalfile,'wb')
marshal.dump(d,f)
f.close()
def convertdir(dir,prefix='',comments=1):
mapnames = os.listdir(dir)
for mapname in mapnames:
name = os.path.split(mapname)[1]
name = string.replace(name,'-','_')
name = string.split(name, '.')[0]
name = string.lower(name)
codefile = name + '.py'
marshalfile = name + '.mapping'
print 'converting %s to %s and %s' % (mapname,
prefix + codefile,
prefix + marshalfile)
try:
map = readmap(os.path.join(dir,mapname))
if not map:
print '* map is empty; skipping'
else:
pymap(mapname, map, prefix + codefile,comments)
marshalmap(mapname, map, prefix + marshalfile)
except ValueError:
print '* conversion failed'
def rewritepythondir(dir,prefix='',comments=1):
mapnames = os.listdir(dir)
for mapname in mapnames:
if not mapname.endswith('.mapping'):
continue
codefile = mapname[:-len('.mapping')] + '.py'
print 'converting %s to %s' % (mapname,
prefix + codefile)
try:
map = marshal.load(open(os.path.join(dir,mapname),
'rb'))
if not map:
print '* map is empty; skipping'
else:
pymap(mapname, map, prefix + codefile,comments)
except ValueError, why:
print '* conversion failed: %s' % why
if __name__ == '__main__':
import sys
if 1:
apply(convertdir,tuple(sys.argv[1:]))
else:
apply(rewritepythondir,tuple(sys.argv[1:]))
--- NEW FILE: h2py.py ---
#! /usr/bin/env python
# Read #define's and translate to Python code.
# Handle #include statements.
# Handle #define macros with one argument.
# Anything that isn't recognized or doesn't translate into valid
# Python is ignored.
# Without filename arguments, acts as a filter.
# If one or more filenames are given, output is written to corresponding
# filenames in the local directory, translated to all uppercase, with
# the extension replaced by ".py".
# By passing one or more options of the form "-i regular_expression"
# you can specify additional strings to be ignored. This is useful
# e.g. to ignore casts to u_long: simply specify "-i '(u_long)'".
# XXX To do:
# - turn trailing C comments into Python comments
# - turn C Boolean operators "&& || !" into Python "and or not"
# - what to do about #if(def)?
# - what to do about macros with multiple parameters?
import sys, regex, regsub, string, getopt, os
p_define = regex.compile('^[\t ]*#[\t ]*define[\t ]+\([a-zA-Z0-9_]+\)[\t ]+')
p_macro = regex.compile(
'^[\t ]*#[\t ]*define[\t ]+'
'\([a-zA-Z0-9_]+\)(\([_a-zA-Z][_a-zA-Z0-9]*\))[\t ]+')
p_include = regex.compile('^[\t ]*#[\t ]*include[\t ]+<\([a-zA-Z0-9_/\.]+\)')
p_comment = regex.compile('/\*\([^*]+\|\*+[^/]\)*\(\*+/\)?')
p_cpp_comment = regex.compile('//.*')
ignores = [p_comment, p_cpp_comment]
p_char = regex.compile("'\(\\\\.[^\\\\]*\|[^\\\\]\)'")
filedict = {}
try:
searchdirs=string.splitfields(os.environ['include'],';')
except KeyError:
try:
searchdirs=string.splitfields(os.environ['INCLUDE'],';')
except KeyError:
try:
if string.find( sys.platform, "beos" ) == 0:
searchdirs=string.splitfields(os.environ['BEINCLUDES'],';')
else:
raise KeyError
except KeyError:
searchdirs=['/usr/include']
def main():
global filedict
opts, args = getopt.getopt(sys.argv[1:], 'i:')
for o, a in opts:
if o == '-i':
ignores.append(regex.compile(a))
if not args:
args = ['-']
for filename in args:
if filename == '-':
sys.stdout.write('# Generated by h2py from stdin\n')
process(sys.stdin, sys.stdout)
else:
fp = open(filename, 'r')
outfile = os.path.basename(filename)
i = string.rfind(outfile, '.')
if i > 0: outfile = outfile[:i]
outfile = string.upper(outfile)
outfile = outfile + '.py'
outfp = open(outfile, 'w')
outfp.write('# Generated by h2py from %s\n' % filename)
filedict = {}
for dir in searchdirs:
if filename[:len(dir)] == dir:
filedict[filename[len(dir)+1:]] = None # no '/' trailing
break
process(fp, outfp)
outfp.close()
fp.close()
def process(fp, outfp, env = {}):
lineno = 0
while 1:
line = fp.readline()
if not line: break
lineno = lineno + 1
n = p_define.match(line)
if n >= 0:
# gobble up continuation lines
while line[-2:] == '\\\n':
nextline = fp.readline()
if not nextline: break
lineno = lineno + 1
line = line + nextline
name = p_define.group(1)
body = line[n:]
# replace ignored patterns by spaces
for p in ignores:
body = regsub.gsub(p, ' ', body)
# replace char literals by ord(...)
body = regsub.gsub(p_char, 'ord(\\0)', body)
stmt = '%s = %s\n' % (name, string.strip(body))
ok = 0
try:
exec stmt in env
except:
sys.stderr.write('Skipping: %s' % stmt)
else:
outfp.write(stmt)
n =p_macro.match(line)
if n >= 0:
macro, arg = p_macro.group(1, 2)
body = line[n:]
for p in ignores:
body = regsub.gsub(p, ' ', body)
body = regsub.gsub(p_char, 'ord(\\0)', body)
stmt = 'def %s(%s): return %s\n' % (macro, arg, body)
try:
exec stmt in env
except:
sys.stderr.write('Skipping: %s' % stmt)
else:
outfp.write(stmt)
if p_include.match(line) >= 0:
regs = p_include.regs
a, b = regs[1]
filename = line[a:b]
if not filedict.has_key(filename):
filedict[filename] = None
inclfp = None
for dir in searchdirs:
try:
inclfp = open(dir + '/' + filename, 'r')
break
except IOError:
pass
if inclfp:
outfp.write(
'\n# Included from %s\n' % filename)
process(inclfp, outfp, env)
else:
sys.stderr.write('Warning - could not find file %s' % filename)
main()
--- NEW FILE: ifdef.py ---
#! /usr/bin/env python
# Selectively preprocess #ifdef / #ifndef statements.
# Usage:
# ifdef [-Dname] ... [-Uname] ... [file] ...
#
# This scans the file(s), looking for #ifdef and #ifndef preprocessor
# commands that test for one of the names mentioned in the -D and -U
# options. On standard output it writes a copy of the input file(s)
# minus those code sections that are suppressed by the selected
# combination of defined/undefined symbols. The #if(n)def/#else/#else
# lines themselfs (if the #if(n)def tests for one of the mentioned
# names) are removed as well.
# Features: Arbitrary nesting of recognized and unrecognized
# preprocesor statements works correctly. Unrecognized #if* commands
# are left in place, so it will never remove too much, only too
# little. It does accept whitespace around the '#' character.
# Restrictions: There should be no comments or other symbols on the
# #if(n)def lines. The effect of #define/#undef commands in the input
# file or in included files is not taken into account. Tests using
# #if and the defined() pseudo function are not recognized. The #elif
# command is not recognized. Improperly nesting is not detected.
# Lines that look like preprocessor commands but which are actually
# part of comments or string literals will be mistaken for
# preprocessor commands.
import sys
import regex
import getopt
import string
defs = []
undefs = []
def main():
opts, args = getopt.getopt(sys.argv[1:], 'D:U:')
for o, a in opts:
if o == '-D':
defs.append(a)
if o == '-U':
undefs.append(a)
if not args:
args = ['-']
for file in args:
if file == '-':
process(sys.stdin, sys.stdout)
else:
f = open(file, 'r')
process(f, sys.stdout)
f.close()
def process(fpi, fpo):
keywords = ('if', 'ifdef', 'ifndef', 'else', 'endif')
ok = 1
stack = []
while 1:
line = fpi.readline()
if not line: break
while line[-2:] == '\\\n':
nextline = fpi.readline()
if not nextline: break
line = line + nextline
tmp = string.strip(line)
if tmp[:1] != '#':
if ok: fpo.write(line)
continue
tmp = string.strip(tmp[1:])
words = string.split(tmp)
keyword = words[0]
if keyword not in keywords:
if ok: fpo.write(line)
continue
if keyword in ('ifdef', 'ifndef') and len(words) == 2:
if keyword == 'ifdef':
ko = 1
else:
ko = 0
word = words[1]
if word in defs:
stack.append((ok, ko, word))
if not ko: ok = 0
elif word in undefs:
stack.append((ok, not ko, word))
if ko: ok = 0
else:
stack.append((ok, -1, word))
if ok: fpo.write(line)
elif keyword == 'if':
stack.append((ok, -1, ''))
if ok: fpo.write(line)
elif keyword == 'else' and stack:
s_ok, s_ko, s_word = stack[-1]
if s_ko < 0:
if ok: fpo.write(line)
else:
s_ko = not s_ko
ok = s_ok
if not s_ko: ok = 0
stack[-1] = s_ok, s_ko, s_word
elif keyword == 'endif' and stack:
s_ok, s_ko, s_word = stack[-1]
if s_ko < 0:
if ok: fpo.write(line)
del stack[-1]
ok = s_ok
else:
sys.stderr.write('Unknown keyword %s\n' % keyword)
if stack:
sys.stderr.write('stack: %s\n' % stack)
main()
--- NEW FILE: lfcr.py ---
#! /usr/bin/env python
"Replace LF with CRLF in argument files. Print names of changed files."
import sys, re, os
for file in sys.argv[1:]:
if os.path.isdir(file):
print file, "Directory!"
continue
data = open(file, "rb").read()
if '\0' in data:
print file, "Binary!"
continue
newdata = re.sub("\r?\n", "\r\n", data)
if newdata != data:
print file
f = open(file, "wb")
f.write(newdata)
f.close()
--- NEW FILE: linktree.py ---
#! /usr/bin/env python
# linktree
#
# Make a copy of a directory tree with symbolic links to all files in the
# original tree.
# All symbolic links go to a special symbolic link at the top, so you
# can easily fix things if the original source tree moves.
# See also "mkreal".
#
# usage: mklinks oldtree newtree
import sys, os
LINK = '.LINK' # Name of special symlink at the top.
debug = 0
def main():
if not 3 <= len(sys.argv) <= 4:
print 'usage:', sys.argv[0], 'oldtree newtree [linkto]'
return 2
oldtree, newtree = sys.argv[1], sys.argv[2]
if len(sys.argv) > 3:
link = sys.argv[3]
link_may_fail = 1
else:
link = LINK
link_may_fail = 0
if not os.path.isdir(oldtree):
print oldtree + ': not a directory'
return 1
try:
os.mkdir(newtree, 0777)
except os.error, msg:
print newtree + ': cannot mkdir:', msg
return 1
linkname = os.path.join(newtree, link)
try:
os.symlink(os.path.join(os.pardir, oldtree), linkname)
except os.error, msg:
if not link_may_fail:
print linkname + ': cannot symlink:', msg
return 1
else:
print linkname + ': warning: cannot symlink:', msg
linknames(oldtree, newtree, link)
return 0
def linknames(old, new, link):
if debug: print 'linknames', (old, new, link)
try:
names = os.listdir(old)
except os.error, msg:
print old + ': warning: cannot listdir:', msg
return
for name in names:
if name not in (os.curdir, os.pardir):
oldname = os.path.join(old, name)
linkname = os.path.join(link, name)
newname = os.path.join(new, name)
if debug > 1: print oldname, newname, linkname
if os.path.isdir(oldname) and \
not os.path.islink(oldname):
try:
os.mkdir(newname, 0777)
ok = 1
except:
print newname + \
': warning: cannot mkdir:', msg
ok = 0
if ok:
linkname = os.path.join(os.pardir,
linkname)
linknames(oldname, newname, linkname)
else:
os.symlink(linkname, newname)
sys.exit(main())
--- NEW FILE: lll.py ---
#! /usr/bin/env python
# Find symbolic links and show where they point to.
# Arguments are directories to search; default is current directory.
# No recursion.
# (This is a totally different program from "findsymlinks.py"!)
import sys, os
def lll(dirname):
for name in os.listdir(dirname):
if name not in (os.curdir, os.pardir):
full = os.path.join(dirname, name)
if os.path.islink(full):
print name, '->', os.readlink(full)
args = sys.argv[1:]
if not args: args = [os.curdir]
first = 1
for arg in args:
if len(args) > 1:
if not first: print
first = 0
print arg + ':'
lll(arg)
--- NEW FILE: logmerge.py ---
#! /usr/bin/env python
"""Consolidate a bunch of CVS or RCS logs read from stdin.
Input should be the output of a CVS or RCS logging command, e.g.
cvs log -rrelease14:
which dumps all log messages from release1.4 upwards (assuming that
release 1.4 was tagged with tag 'release14'). Note the trailing
colon!
This collects all the revision records and outputs them sorted by date
rather than by file, collapsing duplicate revision record, i.e.,
records with the same message for different files.
The -t option causes it to truncate (discard) the last revision log
entry; this is useful when using something like the above cvs log
command, which shows the revisions including the given tag, while you
probably want everything *since* that tag.
XXX This code was created by reverse engineering CVS 1.9 and RCS 5.7
from their output.
"""
import os, sys, getopt, string, re
sep1 = '='*77 + '\n' # file separator
sep2 = '-'*28 + '\n' # revision separator
def main():
"""Main program"""
truncate_last = 0
reverse = 0
opts, args = getopt.getopt(sys.argv[1:], "tr")
for o, a in opts:
if o == '-t':
truncate_last = 1
elif o == '-r':
reverse = 1
database = []
while 1:
chunk = read_chunk(sys.stdin)
if not chunk:
break
records = digest_chunk(chunk)
if truncate_last:
del records[-1]
database[len(database):] = records
database.sort()
if not reverse:
database.reverse()
format_output(database)
def read_chunk(fp):
"""Read a chunk -- data for one file, ending with sep1.
Split the chunk in parts separated by sep2.
"""
chunk = []
lines = []
while 1:
line = fp.readline()
if not line:
break
if line == sep1:
if lines:
chunk.append(lines)
break
if line == sep2:
if lines:
chunk.append(lines)
lines = []
else:
lines.append(line)
return chunk
def digest_chunk(chunk):
"""Digest a chunk -- extrach working file name and revisions"""
lines = chunk[0]
key = 'Working file:'
keylen = len(key)
for line in lines:
if line[:keylen] == key:
working_file = string.strip(line[keylen:])
break
else:
working_file = None
records = []
for lines in chunk[1:]:
revline = lines[0]
dateline = lines[1]
text = lines[2:]
words = string.split(dateline)
author = None
if len(words) >= 3 and words[0] == 'date:':
dateword = words[1]
timeword = words[2]
if timeword[-1:] == ';':
timeword = timeword[:-1]
date = dateword + ' ' + timeword
if len(words) >= 5 and words[3] == 'author:':
author = words[4]
if author[-1:] == ';':
author = author[:-1]
else:
date = None
text.insert(0, revline)
words = string.split(revline)
if len(words) >= 2 and words[0] == 'revision':
rev = words[1]
else:
rev = None
text.insert(0, revline)
records.append((date, working_file, rev, author, text))
return records
def format_output(database):
prevtext = None
prev = []
database.append((None, None, None, None, None)) # Sentinel
for (date, working_file, rev, author, text) in database:
if text != prevtext:
if prev:
print sep2,
for (p_date, p_working_file, p_rev, p_author) in prev:
print p_date, p_author, p_working_file, p_rev
sys.stdout.writelines(prevtext)
prev = []
prev.append((date, working_file, rev, author))
prevtext = text
main()
--- NEW FILE: mailerdaemon.py ---
"""mailerdaemon - classes to parse mailer-daemon messages"""
import string
import rfc822
import calendar
import re
import os
import sys
Unparseable = 'mailerdaemon.Unparseable'
class ErrorMessage(rfc822.Message):
def __init__(self, fp):
rfc822.Message.__init__(self, fp)
self.sub = ''
def is_warning(self):
sub = self.getheader('Subject')
if not sub:
return 0
sub = string.lower(sub)
if sub[:12] == 'waiting mail': return 1
if string.find(sub, 'warning') >= 0: return 1
self.sub = sub
return 0
def get_errors(self):
for p in EMPARSERS:
self.rewindbody()
try:
return p(self.fp, self.sub)
except Unparseable:
pass
raise Unparseable
# List of re's or tuples of re's.
# If a re, it should contain at least a group (?P<email>...) which
# should refer to the email address. The re can also contain a group
# (?P<reason>...) which should refer to the reason (error message).
# If no reason is present, the emparse_list_reason list is used to
# find a reason.
# If a tuple, the tuple should contain 2 re's. The first re finds a
# location, the second re is repeated one or more times to find
# multiple email addresses. The second re is matched (not searched)
# where the previous match ended.
# The re's are compiled using the re module.
emparse_list_list = [
'error: (?P<reason>unresolvable): (?P<email>.+)',
('----- The following addresses had permanent fatal errors -----\n',
'(?P<email>[^ \n].*)\n( .*\n)?'),
'remote execution.*\n.*rmail (?P<email>.+)',
('The following recipients did not receive your message:\n\n',
' +(?P<email>.*)\n(The following recipients did not receive your message:\n\n)?'),
'------- Failure Reasons --------\n\n(?P<reason>.*)\n(?P<email>.*)',
'^<(?P<email>.*)>:\n(?P<reason>.*)',
'^(?P<reason>User mailbox exceeds allowed size): (?P<email>.+)',
'^5\\d{2} <(?P<email>[^\n>]+)>\\.\\.\\. (?P<reason>.+)',
'^Original-Recipient: rfc822;(?P<email>.*)',
'^did not reach the following recipient\\(s\\):\n\n(?P<email>.*) on .*\n +(?P<reason>.*)',
'^ <(?P<email>[^\n>]+)> \\.\\.\\. (?P<reason>.*)',
'^Report on your message to: (?P<email>.*)\nReason: (?P<reason>.*)',
'^Your message was not delivered to +(?P<email>.*)\n +for the following reason:\n +(?P<reason>.*)',
'^ was not +(?P<email>[^ \n].*?) *\n.*\n.*\n.*\n because:.*\n +(?P<reason>[^ \n].*?) *\n',
]
# compile the re's in the list and store them in-place.
for i in range(len(emparse_list_list)):
x = emparse_list_list[i]
if type(x) is type(''):
x = re.compile(x, re.MULTILINE)
else:
xl = []
for x in x:
xl.append(re.compile(x, re.MULTILINE))
x = tuple(xl)
del xl
emparse_list_list[i] = x
del x
del i
# list of re's used to find reasons (error messages).
# if a string, "<>" is replaced by a copy of the email address.
# The expressions are searched for in order. After the first match,
# no more expressions are searched for. So, order is important.
emparse_list_reason = [
r'^5\d{2} <>\.\.\. (?P<reason>.*)',
'<>\.\.\. (?P<reason>.*)',
re.compile(r'^<<< 5\d{2} (?P<reason>.*)', re.MULTILINE),
re.compile('===== stderr was =====\nrmail: (?P<reason>.*)'),
re.compile('^Diagnostic-Code: (?P<reason>.*)', re.MULTILINE),
]
emparse_list_from = re.compile('^From:', re.IGNORECASE|re.MULTILINE)
def emparse_list(fp, sub):
data = fp.read()
res = emparse_list_from.search(data)
if res is None:
from_index = len(data)
else:
from_index = res.start(0)
errors = []
emails = []
reason = None
for regexp in emparse_list_list:
if type(regexp) is type(()):
res = regexp[0].search(data, 0, from_index)
if res is not None:
try:
reason = res.group('reason')
except IndexError:
pass
while 1:
res = regexp[1].match(data, res.end(0), from_index)
if res is None:
break
emails.append(res.group('email'))
break
else:
res = regexp.search(data, 0, from_index)
if res is not None:
emails.append(res.group('email'))
try:
reason = res.group('reason')
except IndexError:
pass
break
if not emails:
raise Unparseable
if not reason:
reason = sub
if reason[:15] == 'returned mail: ':
reason = reason[15:]
for regexp in emparse_list_reason:
if type(regexp) is type(''):
for i in range(len(emails)-1,-1,-1):
email = emails[i]
exp = re.compile(string.join(string.split(regexp, '<>'), re.escape(email)), re.MULTILINE)
res = exp.search(data)
if res is not None:
errors.append(string.join(string.split(string.strip(email)+': '+res.group('reason'))))
del emails[i]
continue
res = regexp.search(data)
if res is not None:
reason = res.group('reason')
break
for email in emails:
errors.append(string.join(string.split(string.strip(email)+': '+reason)))
return errors
EMPARSERS = [emparse_list, ]
def sort_numeric(a, b):
a = string.atoi(a)
b = string.atoi(b)
if a < b: return -1
elif a > b: return 1
else: return 0
def parsedir(dir, modify):
os.chdir(dir)
pat = re.compile('^[0-9]*$')
errordict = {}
errorfirst = {}
errorlast = {}
nok = nwarn = nbad = 0
# find all numeric file names and sort them
files = filter(lambda fn, pat=pat: pat.match(fn) is not None, os.listdir('.'))
files.sort(sort_numeric)
for fn in files:
# Lets try to parse the file.
fp = open(fn)
m = ErrorMessage(fp)
sender = m.getaddr('From')
print '%s\t%-40s\t'%(fn, sender[1]),
if m.is_warning():
fp.close()
print 'warning only'
nwarn = nwarn + 1
if modify:
os.rename(fn, ','+fn)
## os.unlink(fn)
continue
try:
errors = m.get_errors()
except Unparseable:
print '** Not parseable'
nbad = nbad + 1
fp.close()
continue
print len(errors), 'errors'
# Remember them
for e in errors:
try:
mm, dd = m.getdate('date')[1:1+2]
date = '%s %02d' % (calendar.month_abbr[mm], dd)
except:
date = '??????'
if not errordict.has_key(e):
errordict[e] = 1
errorfirst[e] = '%s (%s)' % (fn, date)
else:
errordict[e] = errordict[e] + 1
errorlast[e] = '%s (%s)' % (fn, date)
fp.close()
nok = nok + 1
if modify:
os.rename(fn, ','+fn)
## os.unlink(fn)
print '--------------'
print nok, 'files parsed,',nwarn,'files warning-only,',
print nbad,'files unparseable'
print '--------------'
list = []
for e in errordict.keys():
list.append((errordict[e], errorfirst[e], errorlast[e], e))
list.sort()
for num, first, last, e in list:
print '%d %s - %s\t%s' % (num, first, last, e)
def main():
modify = 0
if len(sys.argv) > 1 and sys.argv[1] == '-d':
modify = 1
del sys.argv[1]
if len(sys.argv) > 1:
for folder in sys.argv[1:]:
parsedir(folder, modify)
else:
parsedir('/ufs/jack/Mail/errorsinbox', modify)
if __name__ == '__main__' or sys.argv[0] == __name__:
main()
--- NEW FILE: md5sum.py ---
#! /usr/bin/env python
"""Python utility to print MD5 checksums of argument files.
Works with Python 1.5.2 and later.
"""
import sys, md5
BLOCKSIZE = 1024*1024
def hexify(s):
return ("%02x"*len(s)) % tuple(map(ord, s))
def main():
args = sys.argv[1:]
if not args:
sys.stderr.write("usage: %s file ...\n" % sys.argv[0])
sys.exit(2)
for file in sys.argv[1:]:
f = open(file, "rb")
sum = md5.new()
while 1:
block = f.read(BLOCKSIZE)
if not block:
break
sum.update(block)
f.close()
print hexify(sum.digest()), file
if __name__ == "__main__":
main()
--- NEW FILE: methfix.py ---
#! /usr/bin/env python
# Fix Python source files to avoid using
# def method(self, (arg1, ..., argn)):
# instead of the more rational
# def method(self, arg1, ..., argn):
#
# Command line arguments are files or directories to be processed.
# Directories are searched recursively for files whose name looks
# like a python module.
# Symbolic links are always ignored (except as explicit directory
# arguments). Of course, the original file is kept as a back-up
# (with a "~" attached to its name).
# It complains about binaries (files containing null bytes)
# and about files that are ostensibly not Python files: if the first
# line starts with '#!' and does not contain the string 'python'.
#
# Changes made are reported to stdout in a diff-like format.
#
# Undoubtedly you can do this using find and sed or perl, but this is
# a nice example of Python code that recurses down a directory tree
# and uses regular expressions. Also note several subtleties like
# preserving the file's mode and avoiding to even write a temp file
# when no changes are needed for a file.
#
# NB: by changing only the function fixline() you can turn this
# into a program for a different change to Python programs...
import sys
import regex
import os
from stat import *
import string
err = sys.stderr.write
dbg = err
rep = sys.stdout.write
def main():
bad = 0
if not sys.argv[1:]: # No arguments
err('usage: ' + sys.argv[0] + ' file-or-directory ...\n')
sys.exit(2)
for arg in sys.argv[1:]:
if os.path.isdir(arg):
if recursedown(arg): bad = 1
elif os.path.islink(arg):
err(arg + ': will not process symbolic links\n')
bad = 1
else:
if fix(arg): bad = 1
sys.exit(bad)
ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$')
def ispython(name):
return ispythonprog.match(name) >= 0
def recursedown(dirname):
dbg('recursedown(' + `dirname` + ')\n')
bad = 0
try:
names = os.listdir(dirname)
except os.error, msg:
err(dirname + ': cannot list directory: ' + `msg` + '\n')
return 1
names.sort()
subdirs = []
for name in names:
if name in (os.curdir, os.pardir): continue
fullname = os.path.join(dirname, name)
if os.path.islink(fullname): pass
elif os.path.isdir(fullname):
subdirs.append(fullname)
elif ispython(name):
if fix(fullname): bad = 1
for fullname in subdirs:
if recursedown(fullname): bad = 1
return bad
def fix(filename):
## dbg('fix(' + `filename` + ')\n')
try:
f = open(filename, 'r')
except IOError, msg:
err(filename + ': cannot open: ' + `msg` + '\n')
return 1
head, tail = os.path.split(filename)
tempname = os.path.join(head, '@' + tail)
g = None
# If we find a match, we rewind the file and start over but
# now copy everything to a temp file.
lineno = 0
while 1:
line = f.readline()
if not line: break
lineno = lineno + 1
if g is None and '\0' in line:
# Check for binary files
err(filename + ': contains null bytes; not fixed\n')
f.close()
return 1
if lineno == 1 and g is None and line[:2] == '#!':
# Check for non-Python scripts
words = string.split(line[2:])
if words and regex.search('[pP]ython', words[0]) < 0:
msg = filename + ': ' + words[0]
msg = msg + ' script; not fixed\n'
err(msg)
f.close()
return 1
while line[-2:] == '\\\n':
nextline = f.readline()
if not nextline: break
line = line + nextline
lineno = lineno + 1
newline = fixline(line)
if newline != line:
if g is None:
try:
g = open(tempname, 'w')
except IOError, msg:
f.close()
err(tempname+': cannot create: '+\
`msg`+'\n')
return 1
f.seek(0)
lineno = 0
rep(filename + ':\n')
continue # restart from the beginning
rep(`lineno` + '\n')
rep('< ' + line)
rep('> ' + newline)
if g is not None:
g.write(newline)
# End of file
f.close()
if not g: return 0 # No changes
# Finishing touch -- move files
# First copy the file's mode to the temp file
try:
statbuf = os.stat(filename)
os.chmod(tempname, statbuf[ST_MODE] & 07777)
except os.error, msg:
err(tempname + ': warning: chmod failed (' + `msg` + ')\n')
# Then make a backup of the original file as filename~
try:
os.rename(filename, filename + '~')
except os.error, msg:
err(filename + ': warning: backup failed (' + `msg` + ')\n')
# Now move the temp file to the original file
try:
os.rename(tempname, filename)
except os.error, msg:
err(filename + ': rename failed (' + `msg` + ')\n')
return 1
# Return succes
return 0
fixpat = '^[ \t]+def +[a-zA-Z0-9_]+ *( *self *, *\(( *\(.*\) *)\) *) *:'
fixprog = regex.compile(fixpat)
def fixline(line):
if fixprog.match(line) >= 0:
(a, b), (c, d) = fixprog.regs[1:3]
line = line[:a] + line[c:d] + line[b:]
return line
main()
--- NEW FILE: mkreal.py ---
#! /usr/bin/env python
# mkreal
#
# turn a symlink to a directory into a real directory
import sys
import os
from stat import *
join = os.path.join
error = 'mkreal error'
BUFSIZE = 32*1024
def mkrealfile(name):
st = os.stat(name) # Get the mode
mode = S_IMODE(st[ST_MODE])
linkto = os.readlink(name) # Make sure again it's a symlink
f_in = open(name, 'r') # This ensures it's a file
os.unlink(name)
f_out = open(name, 'w')
while 1:
buf = f_in.read(BUFSIZE)
if not buf: break
f_out.write(buf)
del f_out # Flush data to disk before changing mode
os.chmod(name, mode)
def mkrealdir(name):
st = os.stat(name) # Get the mode
mode = S_IMODE(st[ST_MODE])
linkto = os.readlink(name)
files = os.listdir(name)
os.unlink(name)
os.mkdir(name, mode)
os.chmod(name, mode)
linkto = join(os.pardir, linkto)
#
for file in files:
if file not in (os.curdir, os.pardir):
os.symlink(join(linkto, file), join(name, file))
def main():
sys.stdout = sys.stderr
progname = os.path.basename(sys.argv[0])
if progname == '-c': progname = 'mkreal'
args = sys.argv[1:]
if not args:
print 'usage:', progname, 'path ...'
sys.exit(2)
status = 0
for name in args:
if not os.path.islink(name):
print progname+':', name+':', 'not a symlink'
status = 1
else:
if os.path.isdir(name):
mkrealdir(name)
else:
mkrealfile(name)
sys.exit(status)
main()
--- NEW FILE: ndiff.py ---
#! /usr/bin/env python
# Module ndiff version 1.6.0
# Released to the public domain 08-Dec-2000,
# by Tim Peters (tim.one@home.com).
# Provided as-is; use at your own risk; no warranty; no promises; enjoy!
"""ndiff [-q] file1 file2
or
ndiff (-r1 | -r2) < ndiff_output > file1_or_file2
Print a human-friendly file difference report to stdout. Both inter-
and intra-line differences are noted. In the second form, recreate file1
(-r1) or file2 (-r2) on stdout, from an ndiff report on stdin.
In the first form, if -q ("quiet") is not specified, the first two lines
of output are
-: file1
+: file2
Each remaining line begins with a two-letter code:
"- " line unique to file1
"+ " line unique to file2
" " line common to both files
"? " line not present in either input file
Lines beginning with "? " attempt to guide the eye to intraline
differences, and were not present in either input file. These lines can be
confusing if the source files contain tab characters.
The first file can be recovered by retaining only lines that begin with
" " or "- ", and deleting those 2-character prefixes; use ndiff with -r1.
The second file can be recovered similarly, but by retaining only " " and
"+ " lines; use ndiff with -r2; or, on Unix, the second file can be
recovered by piping the output through
sed -n '/^[+ ] /s/^..//p'
See module comments for details and programmatic interface.
"""
__version__ = 1, 5, 0
# SequenceMatcher tries to compute a "human-friendly diff" between
# two sequences (chiefly picturing a file as a sequence of lines,
# and a line as a sequence of characters, here). Unlike e.g. UNIX(tm)
# diff, the fundamental notion is the longest *contiguous* & junk-free
# matching subsequence. That's what catches peoples' eyes. The
# Windows(tm) windiff has another interesting notion, pairing up elements
# that appear uniquely in each sequence. That, and the method here,
# appear to yield more intuitive difference reports than does diff. This
# method appears to be the least vulnerable to synching up on blocks
# of "junk lines", though (like blank lines in ordinary text files,
# or maybe "<P>" lines in HTML files). That may be because this is
# the only method of the 3 that has a *concept* of "junk" <wink>.
#
# Note that ndiff makes no claim to produce a *minimal* diff. To the
# contrary, minimal diffs are often counter-intuitive, because they
# synch up anywhere possible, sometimes accidental matches 100 pages
# apart. Restricting synch points to contiguous matches preserves some
# notion of locality, at the occasional cost of producing a longer diff.
#
# With respect to junk, an earlier version of ndiff simply refused to
# *start* a match with a junk element. The result was cases like this:
# before: private Thread currentThread;
# after: private volatile Thread currentThread;
# If you consider whitespace to be junk, the longest contiguous match
# not starting with junk is "e Thread currentThread". So ndiff reported
# that "e volatil" was inserted between the 't' and the 'e' in "private".
# While an accurate view, to people that's absurd. The current version
# looks for matching blocks that are entirely junk-free, then extends the
# longest one of those as far as possible but only with matching junk.
# So now "currentThread" is matched, then extended to suck up the
# preceding blank; then "private" is matched, and extended to suck up the
# following blank; then "Thread" is matched; and finally ndiff reports
# that "volatile " was inserted before "Thread". The only quibble
# remaining is that perhaps it was really the case that " volatile"
# was inserted after "private". I can live with that <wink>.
#
# NOTE on junk: the module-level names
# IS_LINE_JUNK
# IS_CHARACTER_JUNK
# can be set to any functions you like. The first one should accept
# a single string argument, and return true iff the string is junk.
# The default is whether the regexp r"\s*#?\s*$" matches (i.e., a
# line without visible characters, except for at most one splat).
# The second should accept a string of length 1 etc. The default is
# whether the character is a blank or tab (note: bad idea to include
# newline in this!).
#
# After setting those, you can call fcompare(f1name, f2name) with the
# names of the files you want to compare. The difference report
# is sent to stdout. Or you can call main(args), passing what would
# have been in sys.argv[1:] had the cmd-line form been used.
from difflib import SequenceMatcher
import string
TRACE = 0
# define what "junk" means
import re
def IS_LINE_JUNK(line, pat=re.compile(r"\s*#?\s*$").match):
return pat(line) is not None
def IS_CHARACTER_JUNK(ch, ws=" \t"):
return ch in ws
del re
# meant for dumping lines
def dump(tag, x, lo, hi):
for i in xrange(lo, hi):
print tag, x[i],
def plain_replace(a, alo, ahi, b, blo, bhi):
assert alo < ahi and blo < bhi
# dump the shorter block first -- reduces the burden on short-term
# memory if the blocks are of very different sizes
if bhi - blo < ahi - alo:
dump('+', b, blo, bhi)
dump('-', a, alo, ahi)
else:
dump('-', a, alo, ahi)
dump('+', b, blo, bhi)
# When replacing one block of lines with another, this guy searches
# the blocks for *similar* lines; the best-matching pair (if any) is
# used as a synch point, and intraline difference marking is done on
# the similar pair. Lots of work, but often worth it.
def fancy_replace(a, alo, ahi, b, blo, bhi):
if TRACE:
print '*** fancy_replace', alo, ahi, blo, bhi
dump('>', a, alo, ahi)
dump('<', b, blo, bhi)
# don't synch up unless the lines have a similarity score of at
# least cutoff; best_ratio tracks the best score seen so far
best_ratio, cutoff = 0.74, 0.75
cruncher = SequenceMatcher(IS_CHARACTER_JUNK)
eqi, eqj = None, None # 1st indices of equal lines (if any)
# search for the pair that matches best without being identical
# (identical lines must be junk lines, & we don't want to synch up
# on junk -- unless we have to)
for j in xrange(blo, bhi):
bj = b[j]
cruncher.set_seq2(bj)
for i in xrange(alo, ahi):
ai = a[i]
if ai == bj:
if eqi is None:
eqi, eqj = i, j
continue
cruncher.set_seq1(ai)
# computing similarity is expensive, so use the quick
# upper bounds first -- have seen this speed up messy
# compares by a factor of 3.
# note that ratio() is only expensive to compute the first
# time it's called on a sequence pair; the expensive part
# of the computation is cached by cruncher
if cruncher.real_quick_ratio() > best_ratio and \
cruncher.quick_ratio() > best_ratio and \
cruncher.ratio() > best_ratio:
best_ratio, best_i, best_j = cruncher.ratio(), i, j
if best_ratio < cutoff:
# no non-identical "pretty close" pair
if eqi is None:
# no identical pair either -- treat it as a straight replace
plain_replace(a, alo, ahi, b, blo, bhi)
return
# no close pair, but an identical pair -- synch up on that
best_i, best_j, best_ratio = eqi, eqj, 1.0
else:
# there's a close pair, so forget the identical pair (if any)
eqi = None
# a[best_i] very similar to b[best_j]; eqi is None iff they're not
# identical
if TRACE:
print '*** best_ratio', best_ratio, best_i, best_j
dump('>', a, best_i, best_i+1)
dump('<', b, best_j, best_j+1)
# pump out diffs from before the synch point
fancy_helper(a, alo, best_i, b, blo, best_j)
# do intraline marking on the synch pair
aelt, belt = a[best_i], b[best_j]
if eqi is None:
# pump out a '-', '?', '+', '?' quad for the synched lines
atags = btags = ""
cruncher.set_seqs(aelt, belt)
for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
la, lb = ai2 - ai1, bj2 - bj1
if tag == 'replace':
atags += '^' * la
btags += '^' * lb
elif tag == 'delete':
atags += '-' * la
elif tag == 'insert':
btags += '+' * lb
elif tag == 'equal':
atags += ' ' * la
btags += ' ' * lb
else:
raise ValueError, 'unknown tag ' + `tag`
printq(aelt, belt, atags, btags)
else:
# the synch pair is identical
print ' ', aelt,
# pump out diffs from after the synch point
fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi)
def fancy_helper(a, alo, ahi, b, blo, bhi):
if alo < ahi:
if blo < bhi:
fancy_replace(a, alo, ahi, b, blo, bhi)
else:
dump('-', a, alo, ahi)
elif blo < bhi:
dump('+', b, blo, bhi)
# Crap to deal with leading tabs in "?" output. Can hurt, but will
# probably help most of the time.
def printq(aline, bline, atags, btags):
common = min(count_leading(aline, "\t"),
count_leading(bline, "\t"))
common = min(common, count_leading(atags[:common], " "))
print "-", aline,
if count_leading(atags, " ") < len(atags):
print "?", "\t" * common + atags[common:]
print "+", bline,
if count_leading(btags, " ") < len(btags):
print "?", "\t" * common + btags[common:]
def count_leading(line, ch):
i, n = 0, len(line)
while i < n and line[i] == ch:
i += 1
return i
def fail(msg):
import sys
out = sys.stderr.write
out(msg + "\n\n")
out(__doc__)
return 0
# open a file & return the file object; gripe and return 0 if it
# couldn't be opened
def fopen(fname):
try:
return open(fname, 'r')
except IOError, detail:
return fail("couldn't open " + fname + ": " + str(detail))
# open two files & spray the diff to stdout; return false iff a problem
def fcompare(f1name, f2name):
f1 = fopen(f1name)
f2 = fopen(f2name)
if not f1 or not f2:
return 0
a = f1.readlines(); f1.close()
b = f2.readlines(); f2.close()
cruncher = SequenceMatcher(IS_LINE_JUNK, a, b)
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
if tag == 'replace':
fancy_replace(a, alo, ahi, b, blo, bhi)
elif tag == 'delete':
dump('-', a, alo, ahi)
elif tag == 'insert':
dump('+', b, blo, bhi)
elif tag == 'equal':
dump(' ', a, alo, ahi)
else:
raise ValueError, 'unknown tag ' + `tag`
return 1
# crack args (sys.argv[1:] is normal) & compare;
# return false iff a problem
def main(args):
import getopt
try:
opts, args = getopt.getopt(args, "qr:")
except getopt.error, detail:
return fail(str(detail))
noisy = 1
qseen = rseen = 0
for opt, val in opts:
if opt == "-q":
qseen = 1
noisy = 0
elif opt == "-r":
rseen = 1
whichfile = val
if qseen and rseen:
return fail("can't specify both -q and -r")
if rseen:
if args:
return fail("no args allowed with -r option")
if whichfile in "12":
restore(whichfile)
return 1
return fail("-r value must be 1 or 2")
if len(args) != 2:
return fail("need 2 filename args")
f1name, f2name = args
if noisy:
print '-:', f1name
print '+:', f2name
return fcompare(f1name, f2name)
def restore(which):
import sys
tag = {"1": "- ", "2": "+ "}[which]
prefixes = (" ", tag)
for line in sys.stdin.readlines():
if line[:2] in prefixes:
print line[2:],
if __name__ == '__main__':
import sys
args = sys.argv[1:]
if "-profile" in args:
import profile, pstats
args.remove("-profile")
statf = "ndiff.pro"
profile.run("main(args)", statf)
stats = pstats.Stats(statf)
stats.strip_dirs().sort_stats('time').print_stats()
else:
main(args)
--- NEW FILE: nm2def.py ---
#! /usr/bin/env python
"""nm2def.py
Helpers to extract symbols from Unix libs and auto-generate
Windows definition files from them. Depends on nm(1). Tested
on Linux and Solaris only (-p option to nm is for Solaris only).
By Marc-Andre Lemburg, Aug 1998.
Additional notes: the output of nm is supposed to look like this:
acceler.o:
000001fd T PyGrammar_AddAccelerators
U PyGrammar_FindDFA
00000237 T PyGrammar_RemoveAccelerators
U _IO_stderr_
U exit
U fprintf
U free
U malloc
U printf
grammar1.o:
00000000 T PyGrammar_FindDFA
00000034 T PyGrammar_LabelRepr
U _PyParser_TokenNames
U abort
U printf
U sprintf
...
Even if this isn't the default output of your nm, there is generally an
option to produce this format (since it is the original v7 Unix format).
"""
import os,re,string,sys
PYTHONLIB = 'libpython'+sys.version[:3]+'.a'
PC_PYTHONLIB = 'Python'+sys.version[0]+sys.version[2]+'.dll'
NM = 'nm -p -g %s' # For Linux, use "nm -g %s"
def symbols(lib=PYTHONLIB,types=('T','C','D')):
lines = os.popen(NM % lib).readlines()
lines = map(string.strip,lines)
symbols = {}
for line in lines:
if len(line) == 0 or ':' in line:
continue
items = string.split(line)
if len(items) != 3:
continue
address, type, name = items
if type not in types:
continue
symbols[name] = address,type
return symbols
def export_list(symbols):
data = []
code = []
for name,(addr,type) in symbols.items():
if type in ('C','D'):
data.append('\t'+name)
else:
code.append('\t'+name)
data.sort()
data.append('')
code.sort()
return string.join(data,' DATA\n')+'\n'+string.join(code,'\n')
# Definition file template
DEF_TEMPLATE = """\
EXPORTS
%s
"""
# Special symbols that have to be included even though they don't
# pass the filter
SPECIALS = (
)
def filter_Python(symbols,specials=SPECIALS):
for name in symbols.keys():
if name[:2] == 'Py' or name[:3] == '_Py':
pass
elif name not in specials:
del symbols[name]
def main():
s = symbols(PYTHONLIB)
filter_Python(s)
exports = export_list(s)
f = sys.stdout # open('PC/python_nt.def','w')
f.write(DEF_TEMPLATE % (exports))
f.close()
if __name__ == '__main__':
main()
--- NEW FILE: objgraph.py ---
#! /usr/bin/env python
# objgraph
#
# Read "nm -o" input (on IRIX: "nm -Bo") of a set of libraries or modules
# and print various interesting listings, such as:
#
# - which names are used but not defined in the set (and used where),
# - which names are defined in the set (and where),
# - which modules use which other modules,
# - which modules are used by which other modules.
#
# Usage: objgraph [-cdu] [file] ...
# -c: print callers per objectfile
# -d: print callees per objectfile
# -u: print usage of undefined symbols
# If none of -cdu is specified, all are assumed.
# Use "nm -o" to generate the input (on IRIX: "nm -Bo"),
# e.g.: nm -o /lib/libc.a | objgraph
import sys
import string
import os
import getopt
import regex
# Types of symbols.
#
definitions = 'TRGDSBAEC'
externals = 'UV'
ignore = 'Nntrgdsbavuc'
# Regular expression to parse "nm -o" output.
#
matcher = regex.compile('\(.*\):\t?........ \(.\) \(.*\)$')
# Store "item" in "dict" under "key".
# The dictionary maps keys to lists of items.
# If there is no list for the key yet, it is created.
#
def store(dict, key, item):
if dict.has_key(key):
dict[key].append(item)
else:
dict[key] = [item]
# Return a flattened version of a list of strings: the concatenation
# of its elements with intervening spaces.
#
def flat(list):
s = ''
for item in list:
s = s + ' ' + item
return s[1:]
# Global variables mapping defined/undefined names to files and back.
#
file2undef = {}
def2file = {}
file2def = {}
undef2file = {}
# Read one input file and merge the data into the tables.
# Argument is an open file.
#
def readinput(file):
while 1:
s = file.readline()
if not s:
break
# If you get any output from this line,
# it is probably caused by an unexpected input line:
if matcher.search(s) < 0: s; continue # Shouldn't happen
(ra, rb), (r1a, r1b), (r2a, r2b), (r3a, r3b) = matcher.regs[:4]
fn, name, type = s[r1a:r1b], s[r3a:r3b], s[r2a:r2b]
if type in definitions:
store(def2file, name, fn)
store(file2def, fn, name)
elif type in externals:
store(file2undef, fn, name)
store(undef2file, name, fn)
elif not type in ignore:
print fn + ':' + name + ': unknown type ' + type
# Print all names that were undefined in some module and where they are
# defined.
#
def printcallee():
flist = file2undef.keys()
flist.sort()
for file in flist:
print file + ':'
elist = file2undef[file]
elist.sort()
for ext in elist:
if len(ext) >= 8:
tabs = '\t'
else:
tabs = '\t\t'
if not def2file.has_key(ext):
print '\t' + ext + tabs + ' *undefined'
else:
print '\t' + ext + tabs + flat(def2file[ext])
# Print for each module the names of the other modules that use it.
#
def printcaller():
files = file2def.keys()
files.sort()
for file in files:
callers = []
for label in file2def[file]:
if undef2file.has_key(label):
callers = callers + undef2file[label]
if callers:
callers.sort()
print file + ':'
lastfn = ''
for fn in callers:
if fn <> lastfn:
print '\t' + fn
lastfn = fn
else:
print file + ': unused'
# Print undefine names and where they are used.
#
def printundef():
undefs = {}
for file in file2undef.keys():
for ext in file2undef[file]:
if not def2file.has_key(ext):
store(undefs, ext, file)
elist = undefs.keys()
elist.sort()
for ext in elist:
print ext + ':'
flist = undefs[ext]
flist.sort()
for file in flist:
print '\t' + file
# Print warning messages about names defined in more than one file.
#
def warndups():
savestdout = sys.stdout
sys.stdout = sys.stderr
names = def2file.keys()
names.sort()
for name in names:
if len(def2file[name]) > 1:
print 'warning:', name, 'multiply defined:',
print flat(def2file[name])
sys.stdout = savestdout
# Main program
#
def main():
try:
optlist, args = getopt.getopt(sys.argv[1:], 'cdu')
except getopt.error:
sys.stdout = sys.stderr
print 'Usage:', os.path.basename(sys.argv[0]),
print '[-cdu] [file] ...'
print '-c: print callers per objectfile'
print '-d: print callees per objectfile'
print '-u: print usage of undefined symbols'
print 'If none of -cdu is specified, all are assumed.'
print 'Use "nm -o" to generate the input (on IRIX: "nm -Bo"),'
print 'e.g.: nm -o /lib/libc.a | objgraph'
return 1
optu = optc = optd = 0
for opt, void in optlist:
if opt == '-u':
optu = 1
elif opt == '-c':
optc = 1
elif opt == '-d':
optd = 1
if optu == optc == optd == 0:
optu = optc = optd = 1
if not args:
args = ['-']
for file in args:
if file == '-':
readinput(sys.stdin)
else:
readinput(open(file, 'r'))
#
warndups()
#
more = (optu + optc + optd > 1)
if optd:
if more:
print '---------------All callees------------------'
printcallee()
if optu:
if more:
print '---------------Undefined callees------------'
printundef()
if optc:
if more:
print '---------------All Callers------------------'
printcaller()
return 0
# Call the main program.
# Use its return value as exit status.
# Catch interrupts to avoid stack trace.
#
try:
sys.exit(main())
except KeyboardInterrupt:
sys.exit(1)
--- NEW FILE: parseentities.py ---
#!/usr/local/bin/python
""" Utility for parsing HTML entity definitions available from:
http://www.w3.org/ as e.g.
http://www.w3.org/TR/REC-html40/HTMLlat1.ent
Input is read from stdin, output is written to stdout in form of a
Python snippet defining a dictionary "entitydefs" mapping literal
entity name to character or numeric entity.
Marc-Andre Lemburg, mal@lemburg.com, 1999.
Use as you like. NO WARRANTIES.
"""
import re,sys
import TextTools
entityRE = re.compile('<!ENTITY +(\w+) +CDATA +"([^"]+)" +-- +((?:.|\n)+?) *-->')
def parse(text,pos=0,endpos=None):
pos = 0
if endpos is None:
endpos = len(text)
d = {}
while 1:
m = entityRE.search(text,pos,endpos)
if not m:
break
name,charcode,comment = m.groups()
d[name] = charcode,comment
pos = m.end()
return d
def writefile(f,defs):
f.write("entitydefs = {\n")
items = defs.items()
items.sort()
for name,(charcode,comment) in items:
if charcode[:2] == '&#':
code = int(charcode[2:-1])
if code < 256:
charcode = "'\%o'" % code
else:
charcode = repr(charcode)
else:
charcode = repr(charcode)
comment = TextTools.collapse(comment)
f.write(" '%s':\t%s, \t# %s\n" % (name,charcode,comment))
f.write('\n}\n')
if __name__ == '__main__':
if len(sys.argv) > 1:
infile = open(sys.argv[1])
else:
infile = sys.stdin
if len(sys.argv) > 2:
outfile = open(sys.argv[2],'w')
else:
outfile = sys.stdout
text = infile.read()
defs = parse(text)
writefile(outfile,defs)
--- NEW FILE: pathfix.py ---
#! /usr/bin/env python
# Change the #! line occurring in Python scripts. The new interpreter
# pathname must be given with a -i option.
#
# Command line arguments are files or directories to be processed.
# Directories are searched recursively for files whose name looks
# like a python module.
# Symbolic links are always ignored (except as explicit directory
# arguments). Of course, the original file is kept as a back-up
# (with a "~" attached to its name).
#
# Undoubtedly you can do this using find and sed or perl, but this is
# a nice example of Python code that recurses down a directory tree
# and uses regular expressions. Also note several subtleties like
# preserving the file's mode and avoiding to even write a temp file
# when no changes are needed for a file.
#
# NB: by changing only the function fixfile() you can turn this
# into a program for a different change to Python programs...
import sys
import regex
import os
from stat import *
import string
import getopt
err = sys.stderr.write
dbg = err
rep = sys.stdout.write
new_interpreter = None
def main():
global new_interpreter
usage = ('usage: %s -i /interpreter file-or-directory ...\n' %
sys.argv[0])
try:
opts, args = getopt.getopt(sys.argv[1:], 'i:')
except getopt.error, msg:
err(msg + '\n')
err(usage)
sys.exit(2)
for o, a in opts:
if o == '-i':
new_interpreter = a
if not new_interpreter or new_interpreter[0] != '/' or not args:
err('-i option or file-or-directory missing\n')
err(usage)
sys.exit(2)
bad = 0
for arg in args:
if os.path.isdir(arg):
if recursedown(arg): bad = 1
elif os.path.islink(arg):
err(arg + ': will not process symbolic links\n')
bad = 1
else:
if fix(arg): bad = 1
sys.exit(bad)
ispythonprog = regex.compile('^[a-zA-Z0-9_]+\.py$')
def ispython(name):
return ispythonprog.match(name) >= 0
def recursedown(dirname):
dbg('recursedown(' + `dirname` + ')\n')
bad = 0
try:
names = os.listdir(dirname)
except os.error, msg:
err(dirname + ': cannot list directory: ' + `msg` + '\n')
return 1
names.sort()
subdirs = []
for name in names:
if name in (os.curdir, os.pardir): continue
fullname = os.path.join(dirname, name)
if os.path.islink(fullname): pass
elif os.path.isdir(fullname):
subdirs.append(fullname)
elif ispython(name):
if fix(fullname): bad = 1
for fullname in subdirs:
if recursedown(fullname): bad = 1
return bad
def fix(filename):
## dbg('fix(' + `filename` + ')\n')
try:
f = open(filename, 'r')
except IOError, msg:
err(filename + ': cannot open: ' + `msg` + '\n')
return 1
line = f.readline()
fixed = fixline(line)
if line == fixed:
rep(filename+': no change\n')
f.close()
return
head, tail = os.path.split(filename)
tempname = os.path.join(head, '@' + tail)
try:
g = open(tempname, 'w')
except IOError, msg:
f.close()
err(tempname+': cannot create: '+`msg`+'\n')
return 1
rep(filename + ': updating\n')
g.write(fixed)
BUFSIZE = 8*1024
while 1:
buf = f.read(BUFSIZE)
if not buf: break
g.write(buf)
g.close()
f.close()
# Finishing touch -- move files
# First copy the file's mode to the temp file
try:
statbuf = os.stat(filename)
os.chmod(tempname, statbuf[ST_MODE] & 07777)
except os.error, msg:
err(tempname + ': warning: chmod failed (' + `msg` + ')\n')
# Then make a backup of the original file as filename~
try:
os.rename(filename, filename + '~')
except os.error, msg:
err(filename + ': warning: backup failed (' + `msg` + ')\n')
# Now move the temp file to the original file
try:
os.rename(tempname, filename)
except os.error, msg:
err(filename + ': rename failed (' + `msg` + ')\n')
return 1
# Return succes
return 0
def fixline(line):
if line[:2] != '#!':
return line
if string.find(line, "python") < 0:
return line
return '#! %s\n' % new_interpreter
main()
--- NEW FILE: pdeps.py ---
#! /usr/bin/env python
# pdeps
#
# Find dependencies between a bunch of Python modules.
#
# Usage:
# pdeps file1.py file2.py ...
#
# Output:
# Four tables separated by lines like '--- Closure ---':
# 1) Direct dependencies, listing which module imports which other modules
# 2) The inverse of (1)
# 3) Indirect dependencies, or the closure of the above
# 4) The inverse of (3)
#
# To do:
# - command line options to select output type
# - option to automatically scan the Python library for referenced modules
# - option to limit output to particular modules
import sys
import regex
import os
import string
# Main program
#
def main():
args = sys.argv[1:]
if not args:
print 'usage: pdeps file.py file.py ...'
return 2
#
table = {}
for arg in args:
process(arg, table)
#
print '--- Uses ---'
printresults(table)
#
print '--- Used By ---'
inv = inverse(table)
printresults(inv)
#
print '--- Closure of Uses ---'
reach = closure(table)
printresults(reach)
#
print '--- Closure of Used By ---'
invreach = inverse(reach)
printresults(invreach)
#
return 0
# Compiled regular expressions to search for import statements
#
m_import = regex.compile('^[ \t]*from[ \t]+\([^ \t]+\)[ \t]+')
m_from = regex.compile('^[ \t]*import[ \t]+\([^#]+\)')
# Collect data from one file
#
def process(filename, table):
fp = open(filename, 'r')
mod = os.path.basename(filename)
if mod[-3:] == '.py':
mod = mod[:-3]
table[mod] = list = []
while 1:
line = fp.readline()
if not line: break
while line[-1:] == '\\':
nextline = fp.readline()
if not nextline: break
line = line[:-1] + nextline
if m_import.match(line) >= 0:
(a, b), (a1, b1) = m_import.regs[:2]
elif m_from.match(line) >= 0:
(a, b), (a1, b1) = m_from.regs[:2]
else: continue
words = string.splitfields(line[a1:b1], ',')
# print '#', line, words
for word in words:
word = string.strip(word)
if word not in list:
list.append(word)
# Compute closure (this is in fact totally general)
#
def closure(table):
modules = table.keys()
#
# Initialize reach with a copy of table
#
reach = {}
for mod in modules:
reach[mod] = table[mod][:]
#
# Iterate until no more change
#
change = 1
while change:
change = 0
for mod in modules:
for mo in reach[mod]:
if mo in modules:
for m in reach[mo]:
if m not in reach[mod]:
reach[mod].append(m)
change = 1
#
return reach
# Invert a table (this is again totally general).
# All keys of the original table are made keys of the inverse,
# so there may be empty lists in the inverse.
#
def inverse(table):
inv = {}
for key in table.keys():
if not inv.has_key(key):
inv[key] = []
for item in table[key]:
store(inv, item, key)
return inv
# Store "item" in "dict" under "key".
# The dictionary maps keys to lists of items.
# If there is no list for the key yet, it is created.
#
def store(dict, key, item):
if dict.has_key(key):
dict[key].append(item)
else:
dict[key] = [item]
# Tabulate results neatly
#
def printresults(table):
modules = table.keys()
maxlen = 0
for mod in modules: maxlen = max(maxlen, len(mod))
modules.sort()
for mod in modules:
list = table[mod]
list.sort()
print string.ljust(mod, maxlen), ':',
if mod in list:
print '(*)',
for ref in list:
print ref,
print
# Call main and honor exit status
try:
sys.exit(main())
except KeyboardInterrupt:
sys.exit(1)
--- NEW FILE: pindent.py ---
#! /usr/bin/env python
# This file contains a class and a main program that perform three
# related (though complimentary) formatting operations on Python
# programs. When called as "pindent -c", it takes a valid Python
# program as input and outputs a version augmented with block-closing
# comments. When called as "pindent -d", it assumes its input is a
# Python program with block-closing comments and outputs a commentless
# version. When called as "pindent -r" it assumes its input is a
# Python program with block-closing comments but with its indentation
# messed up, and outputs a properly indented version.
# A "block-closing comment" is a comment of the form '# end <keyword>'
# where <keyword> is the keyword that opened the block. If the
# opening keyword is 'def' or 'class', the function or class name may
# be repeated in the block-closing comment as well. Here is an
# example of a program fully augmented with block-closing comments:
# def foobar(a, b):
# if a == b:
# a = a+1
# elif a < b:
# b = b-1
# if b > a: a = a-1
# # end if
# else:
# print 'oops!'
# # end if
# # end def foobar
# Note that only the last part of an if...elif...else... block needs a
# block-closing comment; the same is true for other compound
# statements (e.g. try...except). Also note that "short-form" blocks
# like the second 'if' in the example must be closed as well;
# otherwise the 'else' in the example would be ambiguous (remember
# that indentation is not significant when interpreting block-closing
# comments).
# The operations are idempotent (i.e. applied to their own output
# they yield an identical result). Running first "pindent -c" and
# then "pindent -r" on a valid Python program produces a program that
# is semantically identical to the input (though its indentation may
# be different). Running "pindent -e" on that output produces a
# program that only differs from the original in indentation.
# Other options:
# -s stepsize: set the indentation step size (default 8)
# -t tabsize : set the number of spaces a tab character is worth (default 8)
# -e : expand TABs into spaces
# file ... : input file(s) (default standard input)
# The results always go to standard output
# Caveats:
# - comments ending in a backslash will be mistaken for continued lines
# - continuations using backslash are always left unchanged
# - continuations inside parentheses are not extra indented by -r
# but must be indented for -c to work correctly (this breaks
# idempotency!)
# - continued lines inside triple-quoted strings are totally garbled
# Secret feature:
# - On input, a block may also be closed with an "end statement" --
# this is a block-closing comment without the '#' sign.
# Possible improvements:
# - check syntax based on transitions in 'next' table
# - better error reporting
# - better error recovery
# - check identifier after class/def
# The following wishes need a more complete tokenization of the source:
# - Don't get fooled by comments ending in backslash
# - reindent continuation lines indicated by backslash
# - handle continuation lines inside parentheses/braces/brackets
# - handle triple quoted strings spanning lines
# - realign comments
# - optionally do much more thorough reformatting, a la C indent
# Defaults
STEPSIZE = 8
TABSIZE = 8
EXPANDTABS = 0
import os
import re
import string
import sys
next = {}
next['if'] = next['elif'] = 'elif', 'else', 'end'
next['while'] = next['for'] = 'else', 'end'
next['try'] = 'except', 'finally'
next['except'] = 'except', 'else', 'end'
next['else'] = next['finally'] = next['def'] = next['class'] = 'end'
next['end'] = ()
start = 'if', 'while', 'for', 'try', 'def', 'class'
class PythonIndenter:
def __init__(self, fpi = sys.stdin, fpo = sys.stdout,
indentsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
self.fpi = fpi
self.fpo = fpo
self.indentsize = indentsize
self.tabsize = tabsize
self.lineno = 0
self.expandtabs = expandtabs
self._write = fpo.write
self.kwprog = re.compile(
r'^\s*(?P<kw>[a-z]+)'
r'(\s+(?P<id>[a-zA-Z_]\w*))?'
r'[^\w]')
self.endprog = re.compile(
r'^\s*#?\s*end\s+(?P<kw>[a-z]+)'
r'(\s+(?P<id>[a-zA-Z_]\w*))?'
r'[^\w]')
self.wsprog = re.compile(r'^[ \t]*')
# end def __init__
def write(self, line):
if self.expandtabs:
self._write(string.expandtabs(line, self.tabsize))
else:
self._write(line)
# end if
# end def write
def readline(self):
line = self.fpi.readline()
if line: self.lineno = self.lineno + 1
# end if
return line
# end def readline
def error(self, fmt, *args):
if args: fmt = fmt % args
# end if
sys.stderr.write('Error at line %d: %s\n' % (self.lineno, fmt))
self.write('### %s ###\n' % fmt)
# end def error
def getline(self):
line = self.readline()
while line[-2:] == '\\\n':
line2 = self.readline()
if not line2: break
# end if
line = line + line2
# end while
return line
# end def getline
def putline(self, line, indent = None):
if indent is None:
self.write(line)
return
# end if
tabs, spaces = divmod(indent*self.indentsize, self.tabsize)
i = 0
m = self.wsprog.match(line)
if m: i = m.end()
# end if
self.write('\t'*tabs + ' '*spaces + line[i:])
# end def putline
def reformat(self):
stack = []
while 1:
line = self.getline()
if not line: break # EOF
# end if
m = self.endprog.match(line)
if m:
kw = 'end'
kw2 = m.group('kw')
if not stack:
self.error('unexpected end')
elif stack[-1][0] != kw2:
self.error('unmatched end')
# end if
del stack[-1:]
self.putline(line, len(stack))
continue
# end if
m = self.kwprog.match(line)
if m:
kw = m.group('kw')
if kw in start:
self.putline(line, len(stack))
stack.append((kw, kw))
continue
# end if
if next.has_key(kw) and stack:
self.putline(line, len(stack)-1)
kwa, kwb = stack[-1]
stack[-1] = kwa, kw
continue
# end if
# end if
self.putline(line, len(stack))
# end while
if stack:
self.error('unterminated keywords')
for kwa, kwb in stack:
self.write('\t%s\n' % kwa)
# end for
# end if
# end def reformat
def delete(self):
begin_counter = 0
end_counter = 0
while 1:
line = self.getline()
if not line: break # EOF
# end if
m = self.endprog.match(line)
if m:
end_counter = end_counter + 1
continue
# end if
m = self.kwprog.match(line)
if m:
kw = m.group('kw')
if kw in start:
begin_counter = begin_counter + 1
# end if
# end if
self.putline(line)
# end while
if begin_counter - end_counter < 0:
sys.stderr.write('Warning: input contained more end tags than expected\n')
elif begin_counter - end_counter > 0:
sys.stderr.write('Warning: input contained less end tags than expected\n')
# end if
# end def delete
def complete(self):
self.indentsize = 1
stack = []
todo = []
current, firstkw, lastkw, topid = 0, '', '', ''
while 1:
line = self.getline()
i = 0
m = self.wsprog.match(line)
if m: i = m.end()
# end if
m = self.endprog.match(line)
if m:
thiskw = 'end'
endkw = m.group('kw')
thisid = m.group('id')
else:
m = self.kwprog.match(line)
if m:
thiskw = m.group('kw')
if not next.has_key(thiskw):
thiskw = ''
# end if
if thiskw in ('def', 'class'):
thisid = m.group('id')
else:
thisid = ''
# end if
elif line[i:i+1] in ('\n', '#'):
todo.append(line)
continue
else:
thiskw = ''
# end if
# end if
indent = len(string.expandtabs(line[:i], self.tabsize))
while indent < current:
if firstkw:
if topid:
s = '# end %s %s\n' % (
firstkw, topid)
else:
s = '# end %s\n' % firstkw
# end if
self.putline(s, current)
firstkw = lastkw = ''
# end if
current, firstkw, lastkw, topid = stack[-1]
del stack[-1]
# end while
if indent == current and firstkw:
if thiskw == 'end':
if endkw != firstkw:
self.error('mismatched end')
# end if
firstkw = lastkw = ''
elif not thiskw or thiskw in start:
if topid:
s = '# end %s %s\n' % (
firstkw, topid)
else:
s = '# end %s\n' % firstkw
# end if
self.putline(s, current)
firstkw = lastkw = topid = ''
# end if
# end if
if indent > current:
stack.append((current, firstkw, lastkw, topid))
if thiskw and thiskw not in start:
# error
thiskw = ''
# end if
current, firstkw, lastkw, topid = \
indent, thiskw, thiskw, thisid
# end if
if thiskw:
if thiskw in start:
firstkw = lastkw = thiskw
topid = thisid
else:
lastkw = thiskw
# end if
# end if
for l in todo: self.write(l)
# end for
todo = []
if not line: break
# end if
self.write(line)
# end while
# end def complete
# end class PythonIndenter
# Simplified user interface
# - xxx_filter(input, output): read and write file objects
# - xxx_string(s): take and return string object
# - xxx_file(filename): process file in place, return true iff changed
def complete_filter(input = sys.stdin, output = sys.stdout,
stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
pi.complete()
# end def complete_filter
def delete_filter(input= sys.stdin, output = sys.stdout,
stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
pi.delete()
# end def delete_filter
def reformat_filter(input = sys.stdin, output = sys.stdout,
stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
pi.reformat()
# end def reformat_filter
class StringReader:
def __init__(self, buf):
self.buf = buf
self.pos = 0
self.len = len(self.buf)
# end def __init__
def read(self, n = 0):
if n <= 0:
n = self.len - self.pos
else:
n = min(n, self.len - self.pos)
# end if
r = self.buf[self.pos : self.pos + n]
self.pos = self.pos + n
return r
# end def read
def readline(self):
i = string.find(self.buf, '\n', self.pos)
return self.read(i + 1 - self.pos)
# end def readline
def readlines(self):
lines = []
line = self.readline()
while line:
lines.append(line)
line = self.readline()
# end while
return lines
# end def readlines
# seek/tell etc. are left as an exercise for the reader
# end class StringReader
class StringWriter:
def __init__(self):
self.buf = ''
# end def __init__
def write(self, s):
self.buf = self.buf + s
# end def write
def getvalue(self):
return self.buf
# end def getvalue
# end class StringWriter
def complete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
input = StringReader(source)
output = StringWriter()
pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
pi.complete()
return output.getvalue()
# end def complete_string
def delete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
input = StringReader(source)
output = StringWriter()
pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
pi.delete()
return output.getvalue()
# end def delete_string
def reformat_string(source, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
input = StringReader(source)
output = StringWriter()
pi = PythonIndenter(input, output, stepsize, tabsize, expandtabs)
pi.reformat()
return output.getvalue()
# end def reformat_string
def complete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
source = open(filename, 'r').read()
result = complete_string(source, stepsize, tabsize, expandtabs)
if source == result: return 0
# end if
import os
try: os.rename(filename, filename + '~')
except os.error: pass
# end try
f = open(filename, 'w')
f.write(result)
f.close()
return 1
# end def complete_file
def delete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
source = open(filename, 'r').read()
result = delete_string(source, stepsize, tabsize, expandtabs)
if source == result: return 0
# end if
import os
try: os.rename(filename, filename + '~')
except os.error: pass
# end try
f = open(filename, 'w')
f.write(result)
f.close()
return 1
# end def delete_file
def reformat_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE, expandtabs = EXPANDTABS):
source = open(filename, 'r').read()
result = reformat_string(source, stepsize, tabsize, expandtabs)
if source == result: return 0
# end if
import os
try: os.rename(filename, filename + '~')
except os.error: pass
# end try
f = open(filename, 'w')
f.write(result)
f.close()
return 1
# end def reformat_file
# Test program when called as a script
usage = """
usage: pindent (-c|-d|-r) [-s stepsize] [-t tabsize] [-e] [file] ...
-c : complete a correctly indented program (add #end directives)
-d : delete #end directives
-r : reformat a completed program (use #end directives)
-s stepsize: indentation step (default %(STEPSIZE)d)
-t tabsize : the worth in spaces of a tab (default %(TABSIZE)d)
-e : expand TABs into spaces (defailt OFF)
[file] ... : files are changed in place, with backups in file~
If no files are specified or a single - is given,
the program acts as a filter (reads stdin, writes stdout).
""" % vars()
def error_both(op1, op2):
sys.stderr.write('Error: You can not specify both '+op1+' and -'+op2[0]+' at the same time\n')
sys.stderr.write(usage)
sys.exit(2)
# end def error_both
def test():
import getopt
try:
opts, args = getopt.getopt(sys.argv[1:], 'cdrs:t:e')
except getopt.error, msg:
sys.stderr.write('Error: %s\n' % msg)
sys.stderr.write(usage)
sys.exit(2)
# end try
action = None
stepsize = STEPSIZE
tabsize = TABSIZE
expandtabs = EXPANDTABS
for o, a in opts:
if o == '-c':
if action: error_both(o, action)
# end if
action = 'complete'
elif o == '-d':
if action: error_both(o, action)
# end if
action = 'delete'
elif o == '-r':
if action: error_both(o, action)
# end if
action = 'reformat'
elif o == '-s':
stepsize = string.atoi(a)
elif o == '-t':
tabsize = string.atoi(a)
elif o == '-e':
expandtabs = 1
# end if
# end for
if not action:
sys.stderr.write(
'You must specify -c(omplete), -d(elete) or -r(eformat)\n')
sys.stderr.write(usage)
sys.exit(2)
# end if
if not args or args == ['-']:
action = eval(action + '_filter')
action(sys.stdin, sys.stdout, stepsize, tabsize, expandtabs)
else:
action = eval(action + '_file')
for file in args:
action(file, stepsize, tabsize, expandtabs)
# end for
# end if
# end def test
if __name__ == '__main__':
test()
# end if
--- NEW FILE: ptags.py ---
#! /usr/bin/env python
# ptags
#
# Create a tags file for Python programs, usable with vi.
# Tagged are:
# - functions (even inside other defs or classes)
# - classes
# - filenames
# Warns about files it cannot open.
# No warnings about duplicate tags.
import sys, re, os
tags = [] # Modified global variable!
def main():
args = sys.argv[1:]
for file in args: treat_file(file)
if tags:
fp = open('tags', 'w')
tags.sort()
for s in tags: fp.write(s)
expr = '^[ \t]*(def|class)[ \t]+([a-zA-Z0-9_]+)[ \t]*[:\(]'
matcher = re.compile(expr)
def treat_file(file):
try:
fp = open(file, 'r')
except:
sys.stderr.write('Cannot open %s\n' % file)
return
base = os.path.basename(file)
if base[-3:] == '.py':
base = base[:-3]
s = base + '\t' + file + '\t' + '1\n'
tags.append(s)
while 1:
line = fp.readline()
if not line:
break
m = matcher.match(line)
if m:
content = m.group(0)
name = m.group(2)
s = name + '\t' + file + '\t/^' + content + '/\n'
tags.append(s)
main()
--- NEW FILE: pydoc.pyw ---
import pydoc
pydoc.gui()
--- NEW FILE: redemo.py ---
"""Basic regular expression demostration facility (Perl style syntax)."""
from Tkinter import *
import re
class ReDemo:
def __init__(self, master):
self.master = master
self.promptdisplay = Label(self.master, anchor=W,
text="Enter a Perl-style regular expression:")
self.promptdisplay.pack(side=TOP, fill=X)
self.regexdisplay = Entry(self.master)
self.regexdisplay.pack(fill=X)
self.regexdisplay.focus_set()
self.addoptions()
self.statusdisplay = Label(self.master, text="", anchor=W)
self.statusdisplay.pack(side=TOP, fill=X)
self.labeldisplay = Label(self.master, anchor=W,
text="Enter a string to search:")
self.labeldisplay.pack(fill=X)
self.labeldisplay.pack(fill=X)
self.showframe = Frame(master)
self.showframe.pack(fill=X, anchor=W)
self.showvar = StringVar(master)
self.showvar.set("first")
self.showfirstradio = Radiobutton(self.showframe,
text="Highlight first match",
variable=self.showvar,
value="first",
command=self.recompile)
self.showfirstradio.pack(side=LEFT)
self.showallradio = Radiobutton(self.showframe,
text="Highlight all matches",
variable=self.showvar,
value="all",
command=self.recompile)
self.showallradio.pack(side=LEFT)
self.stringdisplay = Text(self.master, width=60, height=4)
self.stringdisplay.pack(fill=BOTH, expand=1)
self.stringdisplay.tag_configure("hit", background="yellow")
self.grouplabel = Label(self.master, text="Groups:", anchor=W)
self.grouplabel.pack(fill=X)
self.grouplist = Listbox(self.master)
self.grouplist.pack(expand=1, fill=BOTH)
self.regexdisplay.bind('<Key>', self.recompile)
self.stringdisplay.bind('<Key>', self.reevaluate)
self.compiled = None
self.recompile()
btags = self.regexdisplay.bindtags()
self.regexdisplay.bindtags(btags[1:] + btags[:1])
btags = self.stringdisplay.bindtags()
self.stringdisplay.bindtags(btags[1:] + btags[:1])
def addoptions(self):
self.frames = []
self.boxes = []
self.vars = []
for name in ('IGNORECASE',
'LOCALE',
'MULTILINE',
'DOTALL',
'VERBOSE'):
if len(self.boxes) % 3 == 0:
frame = Frame(self.master)
frame.pack(fill=X)
self.frames.append(frame)
val = getattr(re, name)
var = IntVar()
box = Checkbutton(frame,
variable=var, text=name,
offvalue=0, onvalue=val,
command=self.recompile)
box.pack(side=LEFT)
self.boxes.append(box)
self.vars.append(var)
def getflags(self):
flags = 0
for var in self.vars:
flags = flags | var.get()
flags = flags
return flags
def recompile(self, event=None):
try:
self.compiled = re.compile(self.regexdisplay.get(),
self.getflags())
bg = self.promptdisplay['background']
self.statusdisplay.config(text="", background=bg)
except re.error, msg:
self.compiled = None
self.statusdisplay.config(
text="re.error: %s" % str(msg),
background="red")
self.reevaluate()
def reevaluate(self, event=None):
try:
self.stringdisplay.tag_remove("hit", "1.0", END)
except TclError:
pass
try:
self.stringdisplay.tag_remove("hit0", "1.0", END)
except TclError:
pass
self.grouplist.delete(0, END)
if not self.compiled:
return
self.stringdisplay.tag_configure("hit", background="yellow")
self.stringdisplay.tag_configure("hit0", background="orange")
text = self.stringdisplay.get("1.0", END)
last = 0
nmatches = 0
while last <= len(text):
m = self.compiled.search(text, last)
if m is None:
break
first, last = m.span()
if last == first:
last = first+1
tag = "hit0"
else:
tag = "hit"
pfirst = "1.0 + %d chars" % first
plast = "1.0 + %d chars" % last
self.stringdisplay.tag_add(tag, pfirst, plast)
if nmatches == 0:
self.stringdisplay.yview_pickplace(pfirst)
groups = list(m.groups())
groups.insert(0, m.group())
for i in range(len(groups)):
g = "%2d: %s" % (i, `groups[i]`)
self.grouplist.insert(END, g)
nmatches = nmatches + 1
if self.showvar.get() == "first":
break
if nmatches == 0:
self.statusdisplay.config(text="(no match)",
background="yellow")
else:
self.statusdisplay.config(text="")
# Main function, run when invoked as a stand-alone Python program.
def main():
root = Tk()
demo = ReDemo(root)
root.protocol('WM_DELETE_WINDOW', root.quit)
root.mainloop()
if __name__ == '__main__':
main()
--- NEW FILE: reindent.py ---
#! /usr/bin/env python
# Released to the public domain, by Tim Peters, 03 October 2000.
"""reindent [-d][-r][-v] path ...
-d Dry run. Analyze, but don't make any changes to, files.
-r Recurse. Search for all .py files in subdirectories too.
-v Verbose. Print informative msgs; else no output.
Change Python (.py) files to use 4-space indents and no hard tab characters.
Also trim excess whitespace from ends of lines, and empty lines at the ends
of files. Ensure the last line ends with a newline.
Pass one or more file and/or directory paths. When a directory path, all
.py files within the directory will be examined, and, if the -r option is
given, likewise recursively for subdirectories.
Overwrites files in place, renaming the originals with a .bak extension.
If reindent finds nothing to change, the file is left alone. If reindent
does change a file, the changed file is a fixed-point for reindent (i.e.,
running reindent on the resulting .py file won't change it again).
The hard part of reindenting is figuring out what to do with comment
lines. So long as the input files get a clean bill of health from
tabnanny.py, reindent should do a good job.
"""
__version__ = "1"
import tokenize
import os
import sys
verbose = 0
recurse = 0
dryrun = 0
def errprint(*args):
sep = ""
for arg in args:
sys.stderr.write(sep + str(arg))
sep = " "
sys.stderr.write("\n")
def main():
import getopt
global verbose, recurse, dryrun
try:
opts, args = getopt.getopt(sys.argv[1:], "drv")
except getopt.error, msg:
errprint(msg)
return
for o, a in opts:
if o == '-d':
dryrun += 1
elif o == '-r':
recurse += 1
elif o == '-v':
verbose += 1
if not args:
errprint("Usage:", __doc__)
return
for arg in args:
check(arg)
def check(file):
if os.path.isdir(file) and not os.path.islink(file):
if verbose:
print "listing directory", file
names = os.listdir(file)
for name in names:
fullname = os.path.join(file, name)
if ((recurse and os.path.isdir(fullname) and
not os.path.islink(fullname))
or name.lower().endswith(".py")):
check(fullname)
return
if verbose:
print "checking", file, "...",
try:
f = open(file)
except IOError, msg:
errprint("%s: I/O Error: %s" % (file, str(msg)))
return
r = Reindenter(f)
f.close()
if r.run():
if verbose:
print "changed."
if dryrun:
print "But this is a dry run, so leaving it alone."
if not dryrun:
bak = file + ".bak"
if os.path.exists(bak):
os.remove(bak)
os.rename(file, bak)
if verbose:
print "renamed", file, "to", bak
f = open(file, "w")
r.write(f)
f.close()
if verbose:
print "wrote new", file
else:
if verbose:
print "unchanged."
class Reindenter:
def __init__(self, f):
self.find_stmt = 1 # next token begins a fresh stmt?
self.level = 0 # current indent level
# Raw file lines.
self.raw = f.readlines()
# File lines, rstripped & tab-expanded. Dummy at start is so
# that we can use tokenize's 1-based line numbering easily.
# Note that a line is all-blank iff it's "\n".
self.lines = [line.rstrip().expandtabs() + "\n"
for line in self.raw]
self.lines.insert(0, None)
self.index = 1 # index into self.lines of next line
# List of (lineno, indentlevel) pairs, one for each stmt and
# comment line. indentlevel is -1 for comment lines, as a
# signal that tokenize doesn't know what to do about them;
# indeed, they're our headache!
self.stats = []
def run(self):
tokenize.tokenize(self.getline, self.tokeneater)
# Remove trailing empty lines.
lines = self.lines
while lines and lines[-1] == "\n":
lines.pop()
# Sentinel.
stats = self.stats
stats.append((len(lines), 0))
# Map count of leading spaces to # we want.
have2want = {}
# Program after transformation.
after = self.after = []
for i in range(len(stats)-1):
thisstmt, thislevel = stats[i]
nextstmt = stats[i+1][0]
have = getlspace(lines[thisstmt])
want = thislevel * 4
if want < 0:
# A comment line.
if have:
# An indented comment line. If we saw the same
# indentation before, reuse what it most recently
# mapped to.
want = have2want.get(have, -1)
if want < 0:
# Then it probably belongs to the next real stmt.
for j in xrange(i+1, len(stats)-1):
jline, jlevel = stats[j]
if jlevel >= 0:
if have == getlspace(lines[jline]):
want = jlevel * 4
break
if want < 0: # Maybe it's a hanging
# comment like this one,
# in which case we should shift it like its base
# line got shifted.
for j in xrange(i-1, -1, -1):
jline, jlevel = stats[j]
if jlevel >= 0:
want = have + getlspace(after[jline-1]) - \
getlspace(lines[jline])
break
if want < 0:
# Still no luck -- leave it alone.
want = have
else:
want = 0
assert want >= 0
have2want[have] = want
diff = want - have
if diff == 0 or have == 0:
after.extend(lines[thisstmt:nextstmt])
else:
for line in lines[thisstmt:nextstmt]:
if diff > 0:
if line == "\n":
after.append(line)
else:
after.append(" " * diff + line)
else:
remove = min(getlspace(line), -diff)
after.append(line[remove:])
return self.raw != self.after
def write(self, f):
f.writelines(self.after)
# Line-getter for tokenize.
def getline(self):
if self.index >= len(self.lines):
line = ""
else:
line = self.lines[self.index]
self.index += 1
return line
# Line-eater for tokenize.
def tokeneater(self, type, token, (sline, scol), end, line,
INDENT=tokenize.INDENT,
DEDENT=tokenize.DEDENT,
NEWLINE=tokenize.NEWLINE,
COMMENT=tokenize.COMMENT,
NL=tokenize.NL):
if type == NEWLINE:
# A program statement, or ENDMARKER, will eventually follow,
# after some (possibly empty) run of tokens of the form
# (NL | COMMENT)* (INDENT | DEDENT+)?
self.find_stmt = 1
elif type == INDENT:
self.find_stmt = 1
self.level += 1
elif type == DEDENT:
self.find_stmt = 1
self.level -= 1
elif type == COMMENT:
if self.find_stmt:
self.stats.append((sline, -1))
# but we're still looking for a new stmt, so leave
# find_stmt alone
elif type == NL:
pass
elif self.find_stmt:
# This is the first "real token" following a NEWLINE, so it
# must be the first token of the next program statement, or an
# ENDMARKER.
self.find_stmt = 0
if line: # not endmarker
self.stats.append((sline, self.level))
# Count number of leading blanks.
def getlspace(line):
i, n = 0, len(line)
while i < n and line[i] == " ":
i += 1
return i
if __name__ == '__main__':
main()
--- NEW FILE: rgrep.py ---
#! /usr/bin/env python
"""Reverse grep.
Usage: rgrep [-i] pattern file
"""
import sys
import re
import string
import getopt
def main():
bufsize = 64*1024
reflags = 0
opts, args = getopt.getopt(sys.argv[1:], "i")
for o, a in opts:
if o == '-i':
reflags = reflags | re.IGNORECASE
if len(args) < 2:
usage("not enough arguments")
if len(args) > 2:
usage("exactly one file argument required")
pattern, filename = args
try:
prog = re.compile(pattern, reflags)
except re.error, msg:
usage("error in regular expression: %s" % str(msg))
try:
f = open(filename)
except IOError, msg:
usage("can't open %s: %s" % (repr(filename), str(msg)), 1)
f.seek(0, 2)
pos = f.tell()
leftover = None
while pos > 0:
size = min(pos, bufsize)
pos = pos - size
f.seek(pos)
buffer = f.read(size)
lines = string.split(buffer, "\n")
del buffer
if leftover is None:
if not lines[-1]:
del lines[-1]
else:
lines[-1] = lines[-1] + leftover
if pos > 0:
leftover = lines[0]
del lines[0]
else:
leftover = None
lines.reverse()
for line in lines:
if prog.search(line):
print line
def usage(msg, code=2):
sys.stdout = sys.stderr
print msg
print __doc__
sys.exit(code)
if __name__ == '__main__':
main()
--- NEW FILE: suff.py ---
#! /usr/bin/env python
# suff
#
# show different suffixes amongst arguments
import sys
def main():
files = sys.argv[1:]
suffixes = {}
for file in files:
suff = getsuffix(file)
if not suffixes.has_key(suff):
suffixes[suff] = []
suffixes[suff].append(file)
keys = suffixes.keys()
keys.sort()
for suff in keys:
print `suff`, len(suffixes[suff])
def getsuffix(file):
suff = ''
for i in range(len(file)):
if file[i] == '.':
suff = file[i:]
return suff
main()
--- NEW FILE: sum5.py ---
#! /usr/bin/env python
# print md5 checksum for files
bufsize = 8096
fnfilter = None
rmode = 'r'
usage = """
usage: sum5 [-b] [-t] [-l] [-s bufsize] [file ...]
-b : read files in binary mode
-t : read files in text mode (default)
-l : print last pathname component only
-s bufsize: read buffer size (default %d)
file ... : files to sum; '-' or no files means stdin
""" % bufsize
import sys
import string
import os
import md5
import regsub
StringType = type('')
FileType = type(sys.stdin)
def sum(*files):
sts = 0
if files and type(files[-1]) == FileType:
out, files = files[-1], files[:-1]
else:
out = sys.stdout
if len(files) == 1 and type(files[0]) != StringType:
files = files[0]
for f in files:
if type(f) == StringType:
if f == '-':
sts = printsumfp(sys.stdin, '<stdin>', out) or sts
else:
sts = printsum(f, out) or sts
else:
sts = sum(f, out) or sts
return sts
def printsum(file, out = sys.stdout):
try:
fp = open(file, rmode)
except IOError, msg:
sys.stderr.write('%s: Can\'t open: %s\n' % (file, msg))
return 1
if fnfilter:
file = fnfilter(file)
sts = printsumfp(fp, file, out)
fp.close()
return sts
def printsumfp(fp, file, out = sys.stdout):
m = md5.md5()
try:
while 1:
data = fp.read(bufsize)
if not data: break
m.update(data)
except IOError, msg:
sys.stderr.write('%s: I/O error: %s\n' % (file, msg))
return 1
out.write('%s %s\n' % (hexify(m.digest()), file))
return 0
def hexify(s):
res = ''
for c in s:
res = res + '%02x' % ord(c)
return res
def main(args = sys.argv[1:], out = sys.stdout):
global fnfilter, rmode, bufsize
import getopt
try:
opts, args = getopt.getopt(args, 'blts:')
except getopt.error, msg:
sys.stderr.write('%s: %s\n%s' % (sys.argv[0], msg, usage))
return 2
for o, a in opts:
if o == '-l':
fnfilter = os.path.basename
if o == '-b':
rmode = 'rb'
if o == '-t':
rmode = 'r'
if o == '-s':
bufsize = string.atoi(a)
if not args: args = ['-']
return sum(args, out)
if __name__ == '__main__' or __name__ == sys.argv[0]:
sys.exit(main(sys.argv[1:], sys.stdout))
--- NEW FILE: texi2html.py ---
#! /usr/bin/env python
# Convert GNU texinfo files into HTML, one file per node.
# Based on Texinfo 2.14.
# Usage: texi2html [-d] [-d] [-c] inputfile outputdirectory
# The input file must be a complete texinfo file, e.g. emacs.texi.
# This creates many files (one per info node) in the output directory,
# overwriting existing files of the same name. All files created have
# ".html" as their extension.
# XXX To do:
# - handle @comment*** correctly
# - handle @xref {some words} correctly
# - handle @ftable correctly (items aren't indexed?)
# - handle @itemx properly
# - handle @exdent properly
# - add links directly to the proper line from indices
# - check against the definitive list of @-cmds; we still miss (among others):
[...1574 lines suppressed...]
parser.print_headers = print_headers
file = sys.argv[1]
parser.setdirname(sys.argv[2])
if file == '-':
fp = sys.stdin
else:
parser.setincludedir(os.path.dirname(file))
try:
fp = open(file, 'r')
except IOError, msg:
print file, ':', msg
sys.exit(1)
parser.parse(fp)
fp.close()
parser.report()
if __name__ == "__main__":
test()
--- NEW FILE: trace.py ---
#!/usr/bin/env python
# Copyright 2000, Mojam Media, Inc., all rights reserved.
# Author: Skip Montanaro
#
# Copyright 1999, Bioreason, Inc., all rights reserved.
# Author: Andrew Dalke
#
# Copyright 1995-1997, Automatrix, Inc., all rights reserved.
# Author: Skip Montanaro
#
# Copyright 1991-1995, Stichting Mathematisch Centrum, all rights reserved.
#
#
# Permission to use, copy, modify, and distribute this Python software and
# its associated documentation for any purpose without fee is hereby
# granted, provided that the above copyright notice appears in all copies,
# and that both that copyright notice and this permission notice appear in
# supporting documentation, and that the name of neither Automatrix,
# Bioreason or Mojam Media be used in advertising or publicity pertaining to
# distribution of the software without specific, written prior permission.
#
#
# Summary of recent changes:
# Support for files with the same basename (submodules in packages)
# Expanded the idea of how to ignore files or modules
# Split tracing and counting into different classes
# Extracted count information and reporting from the count class
# Added some ability to detect which missing lines could be executed
# Added pseudo-pragma to prohibit complaining about unexecuted lines
# Rewrote the main program
# Summary of older changes:
# Added run-time display of statements being executed
# Incorporated portability and performance fixes from Greg Stein
# Incorporated main program from Michael Scharf
"""
program/module to trace Python program or function execution
Sample use, command line:
trace.py -c -f counts --ignore-dir '$prefix' spam.py eggs
trace.py -t --ignore-dir '$prefix' spam.py eggs
Sample use, programmatically (still more complicated than it should be)
# create an Ignore option, telling it what you want to ignore
ignore = trace.Ignore(dirs = [sys.prefix, sys.exec_prefix])
# create a Coverage object, telling it what to ignore
coverage = trace.Coverage(ignore)
# run the new command using the given trace
trace.run(coverage.trace, 'main()')
# make a report, telling it where you want output
t = trace.create_results_log(coverage.results(),
'/usr/local/Automatrix/concerts/coverage')
show_missing = 1)
The Trace class can be instantited instead of the Coverage class if
runtime display of executable lines is desired instead of statement
converage measurement.
"""
import sys, os, string, marshal, tempfile, copy, operator
def usage(outfile):
outfile.write("""Usage: %s [OPTIONS] <file> [ARGS]
Execution:
--help Display this help then exit.
--version Output version information then exit.
-t,--trace Print the line to be executed to sys.stdout.
-c,--count Count the number of times a line is executed.
Results are written in the results file, if given.
-r,--report Generate a report from a results file; do not
execute any code.
(One of `-t', `-c' or `-r' must be specified)
-s,--summary Generate a brief summary for each file. (Can only
be used with -c or -r.)
I/O:
-f,--file= File name for accumulating results over several runs.
(No file name means do not archive results)
-d,--logdir= Directory to use when writing annotated log files.
Log files are the module __name__ with `.` replaced
by os.sep and with '.pyl' added.
-m,--missing Annotate all executable lines which were not executed
with a '>>>>>> '.
-R,--no-report Do not generate the annotated reports. Useful if
you want to accumulate several over tests.
-C,--coverdir= Generate .cover files in this directory
Selection: Do not trace or log lines from ...
--ignore-module=[string] modules with the given __name__, and submodules
of that module
--ignore-dir=[string] files in the stated directory (multiple
directories can be joined by os.pathsep)
The selection options can be listed multiple times to ignore different
modules.
""" % sys.argv[0])
class Ignore:
def __init__(self, modules = None, dirs = None):
self._mods = modules or []
self._dirs = dirs or []
self._ignore = { '<string>': 1 }
def names(self, filename, modulename):
if self._ignore.has_key(modulename):
return self._ignore[modulename]
# haven't seen this one before, so see if the module name is
# on the ignore list. Need to take some care since ignoring
# "cmp" musn't mean ignoring "cmpcache" but ignoring
# "Spam" must also mean ignoring "Spam.Eggs".
for mod in self._mods:
if mod == modulename: # Identical names, so ignore
self._ignore[modulename] = 1
return 1
# check if the module is a proper submodule of something on
# the ignore list
n = len(mod)
# (will not overflow since if the first n characters are the
# same and the name has not already occured, then the size
# of "name" is greater than that of "mod")
if mod == modulename[:n] and modulename[n] == '.':
self._ignore[modulename] = 1
return 1
# Now check that __file__ isn't in one of the directories
if filename is None:
# must be a built-in, so we must ignore
self._ignore[modulename] = 1
return 1
# Ignore a file when it contains one of the ignorable paths
for d in self._dirs:
# The '+ os.sep' is to ensure that d is a parent directory,
# as compared to cases like:
# d = "/usr/local"
# filename = "/usr/local.py"
# or
# d = "/usr/local.py"
# filename = "/usr/local.py"
if string.find(filename, d + os.sep) == 0:
self._ignore[modulename] = 1
return 1
# Tried the different ways, so we don't ignore this module
self._ignore[modulename] = 0
return 0
def run(trace, cmd):
import __main__
dict = __main__.__dict__
sys.settrace(trace)
try:
exec cmd in dict, dict
finally:
sys.settrace(None)
def runctx(trace, cmd, globals=None, locals=None):
if globals is None: globals = {}
if locals is None: locals = {}
sys.settrace(trace)
try:
exec cmd in dict, dict
finally:
sys.settrace(None)
def runfunc(trace, func, *args, **kw):
result = None
sys.settrace(trace)
try:
result = apply(func, args, kw)
finally:
sys.settrace(None)
return result
class CoverageResults:
def __init__(self, counts = {}, modules = {}):
self.counts = counts.copy() # map (filename, lineno) to count
self.modules = modules.copy() # map filenames to modules
def update(self, other):
"""Merge in the data from another CoverageResults"""
counts = self.counts
other_counts = other.counts
modules = self.modules
other_modules = other.modules
for key in other_counts.keys():
counts[key] = counts.get(key, 0) + other_counts[key]
for key in other_modules.keys():
if modules.has_key(key):
# make sure they point to the same file
assert modules[key] == other_modules[key], \
"Strange! filename %s has two different module " \
"names: %s and %s" % \
(key, modules[key], other_modules[key])
else:
modules[key] = other_modules[key]
# Given a code string, return the SET_LINENO information
def _find_LINENO_from_string(co_code):
"""return all of the SET_LINENO information from a code string"""
import dis
linenos = {}
# This code was filched from the `dis' module then modified
n = len(co_code)
i = 0
prev_op = None
prev_lineno = 0
while i < n:
c = co_code[i]
op = ord(c)
if op == dis.SET_LINENO:
if prev_op == op:
# two SET_LINENO in a row, so the previous didn't
# indicate anything. This occurs with triple
# quoted strings (?). Remove the old one.
del linenos[prev_lineno]
prev_lineno = ord(co_code[i+1]) + ord(co_code[i+2])*256
linenos[prev_lineno] = 1
if op >= dis.HAVE_ARGUMENT:
i = i + 3
else:
i = i + 1
prev_op = op
return linenos
def _find_LINENO(code):
"""return all of the SET_LINENO information from a code object"""
import types
# get all of the lineno information from the code of this scope level
linenos = _find_LINENO_from_string(code.co_code)
# and check the constants for references to other code objects
for c in code.co_consts:
if type(c) == types.CodeType:
# find another code object, so recurse into it
linenos.update(_find_LINENO(c))
return linenos
def find_executable_linenos(filename):
"""return a dict of the line numbers from executable statements in a file
Works by finding all of the code-like objects in the module then searching
the byte code for 'SET_LINENO' terms (so this won't work one -O files).
"""
import parser
assert filename.endswith('.py')
prog = open(filename).read()
ast = parser.suite(prog)
code = parser.compileast(ast, filename)
# The only way I know to find line numbers is to look for the
# SET_LINENO instructions. Isn't there some way to get it from
# the AST?
return _find_LINENO(code)
### XXX because os.path.commonprefix seems broken by my way of thinking...
def commonprefix(dirs):
"Given a list of pathnames, returns the longest common leading component"
if not dirs: return ''
n = copy.copy(dirs)
for i in range(len(n)):
n[i] = n[i].split(os.sep)
prefix = n[0]
for item in n:
for i in range(len(prefix)):
if prefix[:i+1] <> item[:i+1]:
prefix = prefix[:i]
if i == 0: return ''
break
return os.sep.join(prefix)
def create_results_log(results, dirname = ".", show_missing = 1,
save_counts = 0, summary = 0, coverdir = None):
import re
# turn the counts data ("(filename, lineno) = count") into something
# accessible on a per-file basis
per_file = {}
for filename, lineno in results.counts.keys():
lines_hit = per_file[filename] = per_file.get(filename, {})
lines_hit[lineno] = results.counts[(filename, lineno)]
# try and merge existing counts and modules file from dirname
try:
counts = marshal.load(open(os.path.join(dirname, "counts")))
modules = marshal.load(open(os.path.join(dirname, "modules")))
results.update(results.__class__(counts, modules))
except IOError:
pass
# there are many places where this is insufficient, like a blank
# line embedded in a multiline string.
blank = re.compile(r'^\s*(#.*)?$')
# accumulate summary info, if needed
sums = {}
# generate file paths for the coverage files we are going to write...
fnlist = []
tfdir = tempfile.gettempdir()
for key in per_file.keys():
filename = key
# skip some "files" we don't care about...
if filename == "<string>":
continue
# are these caused by code compiled using exec or something?
if filename.startswith(tfdir):
continue
modulename = os.path.split(results.modules[key])[1]
if filename.endswith(".pyc") or filename.endswith(".pyo"):
filename = filename[:-1]
if coverdir:
listfilename = os.path.join(coverdir, modulename + ".cover")
else:
# XXX this is almost certainly not portable!!!
fndir = os.path.dirname(filename)
if os.path.isabs(filename):
coverpath = fndir
else:
coverpath = os.path.join(dirname, fndir)
# build list file name by appending a ".cover" to the module name
# and sticking it into the specified directory
if "." in modulename:
# A module in a package
finalname = modulename.split(".")[-1]
listfilename = os.path.join(coverpath, finalname + ".cover")
else:
listfilename = os.path.join(coverpath, modulename + ".cover")
# Get the original lines from the .py file
try:
lines = open(filename, 'r').readlines()
except IOError, err:
print >> sys.stderr, "trace: Could not open %s for reading " \
"because: %s - skipping" % (`filename`, err.strerror)
continue
try:
outfile = open(listfilename, 'w')
except IOError, err:
sys.stderr.write(
'%s: Could not open %s for writing because: %s" \
"- skipping\n' % ("trace", `listfilename`, err.strerror))
continue
# If desired, get a list of the line numbers which represent
# executable content (returned as a dict for better lookup speed)
if show_missing:
executable_linenos = find_executable_linenos(filename)
else:
executable_linenos = {}
n_lines = 0
n_hits = 0
lines_hit = per_file[key]
for i in range(len(lines)):
line = lines[i]
# do the blank/comment match to try to mark more lines
# (help the reader find stuff that hasn't been covered)
if lines_hit.has_key(i+1):
# count precedes the lines that we captured
outfile.write('%5d: ' % lines_hit[i+1])
n_hits = n_hits + 1
n_lines = n_lines + 1
elif blank.match(line):
# blank lines and comments are preceded by dots
outfile.write(' . ')
else:
# lines preceded by no marks weren't hit
# Highlight them if so indicated, unless the line contains
# '#pragma: NO COVER' (it is possible to embed this into
# the text as a non-comment; no easy fix)
if executable_linenos.has_key(i+1) and \
string.find(lines[i],
string.join(['#pragma', 'NO COVER'])) == -1:
outfile.write('>>>>>> ')
else:
outfile.write(' '*7)
n_lines = n_lines + 1
outfile.write(string.expandtabs(lines[i], 8))
outfile.close()
if summary and n_lines:
percent = int(100 * n_hits / n_lines)
sums[modulename] = n_lines, percent, modulename, filename
if save_counts:
# try and store counts and module info into dirname
try:
marshal.dump(results.counts,
open(os.path.join(dirname, "counts"), "w"))
marshal.dump(results.modules,
open(os.path.join(dirname, "modules"), "w"))
except IOError, err:
sys.stderr.write("cannot save counts/modules " \
"files because %s" % err.strerror)
if summary and sums:
mods = sums.keys()
mods.sort()
print "lines cov% module (path)"
for m in mods:
n_lines, percent, modulename, filename = sums[m]
print "%5d %3d%% %s (%s)" % sums[m]
# There is a lot of code shared between these two classes even though
# it is straightforward to make a super class to share code. However,
# for performance reasons (remember, this is called at every step) I
# wanted to keep everything to a single function call. Also, by
# staying within a single scope, I don't have to temporarily nullify
# sys.settrace, which would slow things down even more.
class Coverage:
def __init__(self, ignore = Ignore()):
self.ignore = ignore
self.ignore_names = ignore._ignore # access ignore's cache (speed hack)
self.counts = {} # keys are (filename, linenumber)
self.modules = {} # maps filename -> module name
def trace(self, frame, why, arg):
if why == 'line':
# something is fishy about getting the file name
filename = frame.f_globals.get("__file__", None)
if filename is None:
filename = frame.f_code.co_filename
try:
modulename = frame.f_globals["__name__"]
except KeyError:
# PyRun_String() for example
# XXX what to do?
modulename = None
# We do this next block to keep from having to make methods
# calls, which also requires resetting the trace
ignore_it = self.ignore_names.get(modulename, -1)
if ignore_it == -1: # unknown filename
sys.settrace(None)
ignore_it = self.ignore.names(filename, modulename)
sys.settrace(self.trace)
# record the module name for every file
self.modules[filename] = modulename
if not ignore_it:
lineno = frame.f_lineno
# record the file name and line number of every trace
key = (filename, lineno)
self.counts[key] = self.counts.get(key, 0) + 1
return self.trace
def results(self):
return CoverageResults(self.counts, self.modules)
class Trace:
def __init__(self, ignore = Ignore()):
self.ignore = ignore
self.ignore_names = ignore._ignore # access ignore's cache (speed hack)
self.files = {'<string>': None} # stores lines from the .py file,
# or None
def trace(self, frame, why, arg):
if why == 'line':
filename = frame.f_code.co_filename
try:
modulename = frame.f_globals["__name__"]
except KeyError:
# PyRun_String() for example
# XXX what to do?
modulename = None
# We do this next block to keep from having to make methods
# calls, which also requires resetting the trace
ignore_it = self.ignore_names.get(modulename, -1)
if ignore_it == -1: # unknown filename
sys.settrace(None)
ignore_it = self.ignore.names(filename, modulename)
sys.settrace(self.trace)
if not ignore_it:
lineno = frame.f_lineno
files = self.files
if filename != '<string>' and not files.has_key(filename):
files[filename] = map(string.rstrip,
open(filename).readlines())
# If you want to see filenames (the original behaviour), try:
# modulename = filename
# or, prettier but confusing when several files have the
# same name
# modulename = os.path.basename(filename)
if files[filename] != None:
print '%s(%d): %s' % (os.path.basename(filename), lineno,
files[filename][lineno-1])
else:
print '%s(%d): ??' % (modulename, lineno)
return self.trace
def _err_exit(msg):
print >> sys.stderr, "%s: %s" % (sys.argv[0], msg)
sys.exit(1)
def main(argv = None):
import getopt
if argv is None:
argv = sys.argv
try:
opts, prog_argv = getopt.getopt(argv[1:], "tcrRf:d:msC:",
["help", "version", "trace", "count",
"report", "no-report",
"file=", "logdir=", "missing",
"ignore-module=", "ignore-dir=",
"coverdir="])
except getopt.error, msg:
print >> sys.stderr, "%s: %s" % (sys.argv[0], msg)
print >> sys.stderr, "Try `%s --help' for more information" \
% sys.argv[0]
sys.exit(1)
trace = 0
count = 0
report = 0
no_report = 0
counts_file = None
logdir = "."
missing = 0
ignore_modules = []
ignore_dirs = []
coverdir = None
summary = 0
for opt, val in opts:
if opt == "--help":
usage(sys.stdout)
sys.exit(0)
if opt == "--version":
sys.stdout.write("trace 2.0\n")
sys.exit(0)
if opt == "-t" or opt == "--trace":
trace = 1
continue
if opt == "-c" or opt == "--count":
count = 1
continue
if opt == "-r" or opt == "--report":
report = 1
continue
if opt == "-R" or opt == "--no-report":
no_report = 1
continue
if opt == "-f" or opt == "--file":
counts_file = val
continue
if opt == "-d" or opt == "--logdir":
logdir = val
continue
if opt == "-m" or opt == "--missing":
missing = 1
continue
if opt == "-C" or opt == "--coverdir":
coverdir = val
continue
if opt == "-s" or opt == "--summary":
summary = 1
continue
if opt == "--ignore-module":
ignore_modules.append(val)
continue
if opt == "--ignore-dir":
for s in string.split(val, os.pathsep):
s = os.path.expandvars(s)
# should I also call expanduser? (after all, could use $HOME)
s = string.replace(s, "$prefix",
os.path.join(sys.prefix, "lib",
"python" + sys.version[:3]))
s = string.replace(s, "$exec_prefix",
os.path.join(sys.exec_prefix, "lib",
"python" + sys.version[:3]))
s = os.path.normpath(s)
ignore_dirs.append(s)
continue
assert 0, "Should never get here"
if len(prog_argv) == 0:
_err_exit("missing name of file to run")
if count + trace + report > 1:
_err_exit("can only specify one of --trace, --count or --report")
if count + trace + report == 0:
_err_exit("must specify one of --trace, --count or --report")
if report and counts_file is None:
_err_exit("--report requires a --file")
if report and no_report:
_err_exit("cannot specify both --report and --no-report")
if logdir is not None:
# warn if the directory doesn't exist, but keep on going
# (is this the correct behaviour?)
if not os.path.isdir(logdir):
sys.stderr.write(
"trace: WARNING, --logdir directory %s is not available\n" %
`logdir`)
sys.argv = prog_argv
progname = prog_argv[0]
if eval(sys.version[:3])>1.3:
sys.path[0] = os.path.split(progname)[0] # ???
# everything is ready
ignore = Ignore(ignore_modules, ignore_dirs)
if trace:
t = Trace(ignore)
try:
run(t.trace, 'execfile(' + `progname` + ')')
except IOError, err:
_err_exit("Cannot run file %s because: %s" % \
(`sys.argv[0]`, err.strerror))
elif count:
t = Coverage(ignore)
try:
run(t.trace, 'execfile(' + `progname` + ')')
except IOError, err:
_err_exit("Cannot run file %s because: %s" % \
(`sys.argv[0]`, err.strerror))
except SystemExit:
pass
results = t.results()
# Add another lookup from the program's file name to its import name
# This give the right results, but I'm not sure why ...
results.modules[progname] = os.path.splitext(progname)[0]
if counts_file:
# add in archived data, if available
try:
old_counts, old_modules = marshal.load(open(counts_file, 'rb'))
except IOError:
pass
else:
results.update(CoverageResults(old_counts, old_modules))
if not no_report:
create_results_log(results, logdir, missing,
summary=summary, coverdir=coverdir)
if counts_file:
try:
marshal.dump( (results.counts, results.modules),
open(counts_file, 'wb'))
except IOError, err:
_err_exit("Cannot save counts file %s because: %s" % \
(`counts_file`, err.strerror))
elif report:
old_counts, old_modules = marshal.load(open(counts_file, 'rb'))
results = CoverageResults(old_counts, old_modules)
create_results_log(results, logdir, missing,
summary=summary, coverdir=coverdir)
else:
assert 0, "Should never get here"
if __name__=='__main__':
main()
--- NEW FILE: treesync.py ---
#! /usr/bin/env python
"""Script to synchronize two source trees.
Invoke with two arguments:
python treesync.py slave master
The assumption is that "master" contains CVS administration while
slave doesn't. All files in the slave tree that have a CVS/Entries
entry in the master tree are synchronized. This means:
If the files differ:
if the slave file is newer:
normalize the slave file
if the files still differ:
copy the slave to the master
else (the master is newer):
copy the master to the slave
normalizing the slave means replacing CRLF with LF when the master
doesn't use CRLF
"""
import os, sys, stat, string, getopt
# Interactivity options
default_answer = "ask"
create_files = "yes"
create_directories = "no"
write_slave = "ask"
write_master = "ask"
def main():
global always_no, always_yes
global create_directories, write_master, write_slave
opts, args = getopt.getopt(sys.argv[1:], "nym:s:d:f:a:")
for o, a in opts:
if o == '-y':
default_answer = "yes"
if o == '-n':
default_answer = "no"
if o == '-s':
write_slave = a
if o == '-m':
write_master = a
if o == '-d':
create_directories = a
if o == '-f':
create_files = a
if o == '-a':
create_files = create_directories = write_slave = write_master = a
try:
[slave, master] = args
except ValueError:
print "usage: python", sys.argv[0] or "treesync.py",
print "[-n] [-y] [-m y|n|a] [-s y|n|a] [-d y|n|a] [-f n|y|a]",
print "slavedir masterdir"
return
process(slave, master)
def process(slave, master):
cvsdir = os.path.join(master, "CVS")
if not os.path.isdir(cvsdir):
print "skipping master subdirectory", master
print "-- not under CVS"
return
print "-"*40
print "slave ", slave
print "master", master
if not os.path.isdir(slave):
if not okay("create slave directory %s?" % slave,
answer=create_directories):
print "skipping master subdirectory", master
print "-- no corresponding slave", slave
return
print "creating slave directory", slave
try:
os.mkdir(slave)
except os.error, msg:
print "can't make slave directory", slave, ":", msg
return
else:
print "made slave directory", slave
cvsdir = None
subdirs = []
names = os.listdir(master)
for name in names:
mastername = os.path.join(master, name)
slavename = os.path.join(slave, name)
if name == "CVS":
cvsdir = mastername
else:
if os.path.isdir(mastername) and not os.path.islink(mastername):
subdirs.append((slavename, mastername))
if cvsdir:
entries = os.path.join(cvsdir, "Entries")
for e in open(entries).readlines():
words = string.split(e, '/')
if words[0] == '' and words[1:]:
name = words[1]
s = os.path.join(slave, name)
m = os.path.join(master, name)
compare(s, m)
for (s, m) in subdirs:
process(s, m)
def compare(slave, master):
try:
sf = open(slave, 'r')
except IOError:
sf = None
try:
mf = open(master, 'rb')
except IOError:
mf = None
if not sf:
if not mf:
print "Neither master nor slave exists", master
return
print "Creating missing slave", slave
copy(master, slave, answer=create_files)
return
if not mf:
print "Not updating missing master", master
return
if sf and mf:
if identical(sf, mf):
return
sft = mtime(sf)
mft = mtime(mf)
if mft > sft:
# Master is newer -- copy master to slave
sf.close()
mf.close()
print "Master ", master
print "is newer than slave", slave
copy(master, slave, answer=write_slave)
return
# Slave is newer -- copy slave to master
print "Slave is", sft-mft, "seconds newer than master"
# But first check what to do about CRLF
mf.seek(0)
fun = funnychars(mf)
mf.close()
sf.close()
if fun:
print "***UPDATING MASTER (BINARY COPY)***"
copy(slave, master, "rb", answer=write_master)
else:
print "***UPDATING MASTER***"
copy(slave, master, "r", answer=write_master)
BUFSIZE = 16*1024
def identical(sf, mf):
while 1:
sd = sf.read(BUFSIZE)
md = mf.read(BUFSIZE)
if sd != md: return 0
if not sd: break
return 1
def mtime(f):
st = os.fstat(f.fileno())
return st[stat.ST_MTIME]
def funnychars(f):
while 1:
buf = f.read(BUFSIZE)
if not buf: break
if '\r' in buf or '\0' in buf: return 1
return 0
def copy(src, dst, rmode="rb", wmode="wb", answer='ask'):
print "copying", src
print " to", dst
if not okay("okay to copy? ", answer):
return
f = open(src, rmode)
g = open(dst, wmode)
while 1:
buf = f.read(BUFSIZE)
if not buf: break
g.write(buf)
f.close()
g.close()
def okay(prompt, answer='ask'):
answer = string.lower(string.strip(answer))
if not answer or answer[0] not in 'ny':
answer = raw_input(prompt)
answer = string.lower(string.strip(answer))
if not answer:
answer = default_answer
if answer[:1] == 'y':
return 1
if answer[:1] == 'n':
return 0
print "Yes or No please -- try again:"
return okay(prompt)
main()
--- NEW FILE: untabify.py ---
#! /usr/bin/env python
"Replace tabs with spaces in argument files. Print names of changed files."
import os
import sys
import string
import getopt
def main():
tabsize = 8
try:
opts, args = getopt.getopt(sys.argv[1:], "t:")
if not args:
raise getopt.error, "At least one file argument required"
except getopt.error, msg:
print msg
print "usage:", sys.argv[0], "[-t tabwidth] file ..."
return
for optname, optvalue in opts:
if optname == '-t':
tabsize = int(optvalue)
for file in args:
process(file, tabsize)
def process(file, tabsize):
try:
f = open(file)
text = f.read()
f.close()
except IOError, msg:
print "%s: I/O error: %s" % (`file`, str(msg))
return
newtext = string.expandtabs(text, tabsize)
if newtext == text:
return
backup = file + "~"
try:
os.unlink(backup)
except os.error:
pass
try:
os.rename(file, backup)
except os.error:
pass
f = open(file, "w")
f.write(newtext)
f.close()
print file
if __name__ == '__main__':
main()
--- NEW FILE: which.py ---
#! /usr/bin/env python
# Variant of "which".
# On stderr, near and total misses are reported.
# '-l<flags>' argument adds ls -l<flags> of each file found.
import sys
if sys.path[0] in (".", ""): del sys.path[0]
import sys, os, string
from stat import *
def msg(str):
sys.stderr.write(str + '\n')
pathlist = string.splitfields(os.environ['PATH'], ':')
sts = 0
longlist = ''
if sys.argv[1:] and sys.argv[1][:2] == '-l':
longlist = sys.argv[1]
del sys.argv[1]
for prog in sys.argv[1:]:
ident = ()
for dir in pathlist:
file = os.path.join(dir, prog)
try:
st = os.stat(file)
except os.error:
continue
if not S_ISREG(st[ST_MODE]):
msg(file + ': not a disk file')
else:
mode = S_IMODE(st[ST_MODE])
if mode & 0111:
if not ident:
print file
ident = st[:3]
else:
if st[:3] == ident:
s = 'same as: '
else:
s = 'also: '
msg(s + file)
else:
msg(file + ': not executable')
if longlist:
sts = os.system('ls ' + longlist + ' ' + file)
if sts: msg('"ls -l" exit status: ' + `sts`)
if not ident:
msg(prog + ': not found')
sts = 1
sys.exit(sts)
--- NEW FILE: xxci.py ---
#! /usr/bin/env python
# xxci
#
# check in files for which rcsdiff returns nonzero exit status
import sys
import os
from stat import *
import commands
import fnmatch
import string
EXECMAGIC = '\001\140\000\010'
MAXSIZE = 200*1024 # Files this big must be binaries and are skipped.
def getargs():
args = sys.argv[1:]
if args:
return args
print 'No arguments, checking almost *, in "ls -t" order'
list = []
for file in os.listdir(os.curdir):
if not skipfile(file):
list.append((getmtime(file), file))
list.sort()
if not list:
print 'Nothing to do -- exit 1'
sys.exit(1)
list.sort()
list.reverse()
for mtime, file in list: args.append(file)
return args
def getmtime(file):
try:
st = os.stat(file)
return st[ST_MTIME]
except os.error:
return -1
badnames = ['tags', 'TAGS', 'xyzzy', 'nohup.out', 'core']
badprefixes = ['.', ',', '@', '#', 'o.']
badsuffixes = \
['~', '.a', '.o', '.old', '.bak', '.orig', '.new', '.prev', '.not', \
'.pyc', '.fdc', '.rgb', '.elc', ',v']
ignore = []
def setup():
ignore[:] = badnames
for p in badprefixes:
ignore.append(p + '*')
for p in badsuffixes:
ignore.append('*' + p)
try:
f = open('.xxcign', 'r')
except IOError:
return
ignore[:] = ignore + string.split(f.read())
def skipfile(file):
for p in ignore:
if fnmatch.fnmatch(file, p): return 1
try:
st = os.lstat(file)
except os.error:
return 1 # Doesn't exist -- skip it
# Skip non-plain files.
if not S_ISREG(st[ST_MODE]): return 1
# Skip huge files -- probably binaries.
if st[ST_SIZE] >= MAXSIZE: return 1
# Skip executables
try:
data = open(file, 'r').read(len(EXECMAGIC))
if data == EXECMAGIC: return 1
except:
pass
return 0
def badprefix(file):
for bad in badprefixes:
if file[:len(bad)] == bad: return 1
return 0
def badsuffix(file):
for bad in badsuffixes:
if file[-len(bad):] == bad: return 1
return 0
def go(args):
for file in args:
print file + ':'
if differing(file):
showdiffs(file)
if askyesno('Check in ' + file + ' ? '):
sts = os.system('rcs -l ' + file) # ignored
sts = os.system('ci -l ' + file)
def differing(file):
cmd = 'co -p ' + file + ' 2>/dev/null | cmp -s - ' + file
sts = os.system(cmd)
return sts != 0
def showdiffs(file):
cmd = 'rcsdiff ' + file + ' 2>&1 | ${PAGER-more}'
sts = os.system(cmd)
def askyesno(prompt):
s = raw_input(prompt)
return s in ['y', 'yes']
try:
setup()
go(getargs())
except KeyboardInterrupt:
print '[Intr]'