[Pythonmac-SIG] Re: [HELP!] PC Line Endings? Batch file replacement
Chris Barker
Chris.Barker@noaa.gov
Mon, 11 Mar 2002 09:30:04 -0800
This is a multi-part message in MIME format.
--------------E6C62C8F6E2E246705BDC51B
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Dean,
your utility looks great.
My only suggestion is something you've already talked about: converting
spaces->tabs and tabs-> spaces while preserving Python symantics. Since
this utility is designed to work with Python files, there is absolutely
no point in doing it any other way.
The code to do it right is not as trivial as I thought when I first
started to write it, but then I found a converter on the net that I
believe was written by Gordan McMillian. He used the tokenize module to
do the heavy lifting, and this guarantees that it is does the way the
Python interpreter would do it. I've enclosed it with this email. You
should be able to do a quick cut & paste of some of this code into your
utility, and have what you want.
Note that this code allows convertion to mixed tabs and spaces...please
don't allow that option in your version!
-Chris
--
Christopher Barker, Ph.D.
Oceanographer
NOAA/OR&R/HAZMAT (206) 526-6959 voice
7600 Sand Point Way NE (206) 526-6329 fax
Seattle, WA 98115 (206) 526-6317 main reception
Chris.Barker@noaa.gov
--------------E6C62C8F6E2E246705BDC51B
Content-Type: text/plain; charset=us-ascii;
name="tabcleaner.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="tabcleaner.py"
#!/usr/bin/python
import tokenize
import string
TABSONLY = 'TABSONLY'
SPACESONLY = 'SPACESONLY'
MIXED = 'MIXED'
class PyText:
def __init__(self, fnm, optdict):
self.optdict = optdict
self.fnm = fnm
self.txt = open(self.fnm, 'r').readlines()
self.indents = [(0, 0, )]
self.lnndx = 0
self.indentndx = 0
def getline(self):
if self.lnndx < len(self.txt):
txt = self.txt[self.lnndx]
self.lnndx = self.lnndx + 1
else:
txt = ''
return txt
def tokeneater(self, type, token, start, end, line):
if type == tokenize.INDENT:
(lvl, s) = self.indents[-1]
self.indents[-1] = (lvl, s, start[0]-1)
self.indents.append((lvl+1, start[0]-1,))
elif type == tokenize.DEDENT:
(lvl, s) = self.indents[-1]
self.indents[-1] = (lvl, s, start[0]-1)
self.indents.append((lvl-1, start[0]-1,))
elif type == tokenize.ENDMARKER:
(lvl, s) = self.indents[-1]
self.indents[-1] = (lvl, s, len(self.txt))
def split(self, ln):
content = string.lstrip(ln)
if not content:
return ('', '\n')
lead = ln[:len(ln) - len(content)]
lead = string.expandtabs(lead)
return (lead, content)
def process(self):
style = self.optdict.get('style', TABSONLY)
indent = string.atoi(self.optdict.get('indent', '4'))
tabsz = string.atoi(self.optdict.get('tabs', '8'))
print 'file %s -> style %s, tabsize %d, indent %d' % (self.fnm, style, tabsz, indent)
tokenize.tokenize(self.getline, self.tokeneater)
#import pprint
#pprint.pprint(self.indents)
new = []
for (lvl, s, e) in self.indents:
if s >= len(self.txt):
break
if s == e:
continue
oldlead, content = self.split(self.txt[s])
#print "oldlead", len(oldlead), `oldlead`
if style == TABSONLY:
newlead = '\t'*lvl
elif style == SPACESONLY:
newlead = ' '*(indent*lvl)
else:
sz = indent*lvl
t,spcs = divmod(sz, tabsz)
newlead = '\t'*t + ' '*spcs
new.append(newlead + content)
for ln in self.txt[s+1:e]:
lead, content = self.split(ln)
#print "lead:", len(lead)
new.append(newlead + lead[len(oldlead):] + content)
self.save(new)
#print "---", self.fnm
#for ln in new:
# print ln,
#print
def save(self, txt):
bakname = os.path.splitext(self.fnm)[0]+'.bak'
print "backing up", self.fnm, "to", bakname
#print os.getcwd()
try:
os.rename(self.fnm, bakname)
except os.error:
os.remove(bakname)
os.rename(self.fnm, bakname)
open(self.fnm, 'w').writelines(txt)
def test():
tc = PyText('test1.py')
tc.process()
tc = PyText('test1.py')
tc.process(style=TABSONLY)
tc = PyText('test1.py')
tc.process(style=MIXED, indent=4, tabs=8)
tc = PyText('test1.py')
tc.process(style=MIXED, indent=2, tabs=8)
def cleanfile(fnm, d):
if os.path.isdir(fnm) and not os.path.islink(fnm):
names = os.listdir(fnm)
for name in names:
fullnm = os.path.join(fnm, name)
if (os.path.isdir(fullnm) and not os.path.islink(fullnm)) or \
os.path.normcase(fullnm[-3:]) == ".py":
cleanfile(fullnm, d)
return
tc = PyText(fnm, d)
tc.process()
usage="""\
%s [options] [path...]
options
-T : reformat to TABS ONLY
-S : reformat to SPACES ONLY ( -i option is important)
-M : reformat to MIXED SPACES / TABS ( -t and -i options important)
-t<n> : tab is worth <n> characters
-i<n> : indents should be <n> characters
-h : print this text
path is file or directory
"""
if __name__ == '__main__':
import sys, getopt, os
opts, args = getopt.getopt(sys.argv[1:], "TSMht:i:")
d = {}
print `opts`
for opt in opts:
if opt[0] == '-T':
d['style'] = TABSONLY
elif opt[0] == '-S':
d['style'] = SPACESONLY
elif opt[0] == '-M':
d['style'] = MIXED
elif opt[0] == '-t':
d['tabs'] = opt[1]
elif opt[0] == '-i':
d['indent'] = opt[1]
elif opt[0] == '-h':
print usage % sys.argv[0]
sys.exit(0)
if not args:
print usage % sys.argv[0]
for arg in args:
cleanfile(arg, d)
--------------E6C62C8F6E2E246705BDC51B--