[Pythonmac-SIG] space indented files
Chris Barker
cbarker@jps.net
Mon, 18 Oct 1999 10:45:32 -0700
This is a multi-part message in MIME format.
--------------1B1EF91C859852EFAA66EB3D
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Oliver Steele wrote:
****************************************
Remaining work:
- recognize files indented with a mixture of tabs and spaces
- test the heuristics on more files
- add a user indication and user control for the translation (I probably
won't do this one, but if it gets added to the IDE someone might want to
do
this)
****************************************
This sounds great!
I don't know if it will be helpful, but I've enclosed a script I got
from someone that does tab-space-tab+space translation for you (I'd love
to credit the author, but s/he did't put a name in the file, and I don't
remember who i got it from). I suppose the tricky part is to figure out
what format was orignally used. I have to say, though, that I'd be just
as happy if tab+spaces was NOT preserved! using the two together is a
great way to invite problems.
Note also, that if the file uses a mixture of tabs and spaces, you have
to use a tabstop of 8 spaces, as this is what Python assumes.
--
Christopher Barker,
Ph.D.
cbarker@jps.net --- --- ---
http://www.jps.net/cbarker -----@@ -----@@ -----@@
------@@@ ------@@@ ------@@@
Water Resources Engineering ------ @ ------ @ ------ @
Coastal and Fluvial Hydrodynamics ------- --------- --------
------------------------------------------------------------------------
------------------------------------------------------------------------
--------------1B1EF91C859852EFAA66EB3D
Content-Type: text/plain; charset=us-ascii;
name="tabcleaner.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="tabcleaner.py"
#!/usr/bin/python
import tokenize
import string
TABSONLY = 'TABSONLY'
SPACESONLY = 'SPACESONLY'
MIXED = 'MIXED'
class PyText:
def __init__(self, fnm, optdict):
self.optdict = optdict
self.fnm = fnm
self.txt = open(self.fnm, 'r').readlines()
self.indents = [(0, 0, )]
self.lnndx = 0
self.indentndx = 0
def getline(self):
if self.lnndx < len(self.txt):
txt = self.txt[self.lnndx]
self.lnndx = self.lnndx + 1
else:
txt = ''
return txt
def tokeneater(self, type, token, start, end, line):
if type == tokenize.INDENT:
(lvl, s) = self.indents[-1]
self.indents[-1] = (lvl, s, start[0]-1)
self.indents.append((lvl+1, start[0]-1,))
elif type == tokenize.DEDENT:
(lvl, s) = self.indents[-1]
self.indents[-1] = (lvl, s, start[0]-1)
self.indents.append((lvl-1, start[0]-1,))
elif type == tokenize.ENDMARKER:
(lvl, s) = self.indents[-1]
self.indents[-1] = (lvl, s, len(self.txt))
def split(self, ln):
content = string.lstrip(ln)
if not content:
return ('', '\n')
lead = ln[:len(ln) - len(content)]
lead = string.expandtabs(lead)
return (lead, content)
def process(self):
style = self.optdict.get('style', SPACESONLY)
indent = string.atoi(self.optdict.get('indent', '4'))
tabsz = string.atoi(self.optdict.get('tabs', '8'))
print 'file %s -> style %s, tabsize %d, indent %d' % (self.fnm, style, tabsz, indent)
tokenize.tokenize(self.getline, self.tokeneater)
#import pprint
#pprint.pprint(self.indents)
new = []
for (lvl, s, e) in self.indents:
if s >= len(self.txt):
break
if s == e:
continue
oldlead, content = self.split(self.txt[s])
#print "oldlead", len(oldlead), `oldlead`
if style == TABSONLY:
newlead = '\t'*lvl
elif style == SPACESONLY:
newlead = ' '*(indent*lvl)
else:
sz = indent*lvl
t,spcs = divmod(sz, tabsz)
newlead = '\t'*t + ' '*spcs
new.append(newlead + content)
for ln in self.txt[s+1:e]:
lead, content = self.split(ln)
#print "lead:", len(lead)
new.append(newlead + lead[len(oldlead):] + content)
self.save(new)
#print "---", self.fnm
#for ln in new:
# print ln,
#print
def save(self, txt):
bakname = os.path.splitext(self.fnm)[0]+'.bak'
print "backing up", self.fnm, "to", bakname
#print os.getcwd()
try:
os.rename(self.fnm, bakname)
except os.error:
os.remove(bakname)
os.rename(self.fnm, bakname)
open(self.fnm, 'w').writelines(txt)
def test():
tc = PyText('test1.py')
tc.process()
tc = PyText('test1.py')
tc.process(style=TABSONLY)
tc = PyText('test1.py')
tc.process(style=MIXED, indent=4, tabs=8)
tc = PyText('test1.py')
tc.process(style=MIXED, indent=2, tabs=8)
def cleanfile(fnm, d):
if os.path.isdir(fnm) and not os.path.islink(fnm):
names = os.listdir(fnm)
for name in names:
fullnm = os.path.join(fnm, name)
if (os.path.isdir(fullnm) and not os.path.islink(fullnm)) or \
os.path.normcase(fullnm[-3:]) == ".py":
cleanfile(fullnm, d)
return
tc = PyText(fnm, d)
tc.process()
usage="""\
%s [options] [path...]
options
-T : reformat to TABS ONLY
-S : reformat to SPACES ONLY ( -i option is important)
-M : reformat to MIXED SPACES / TABS ( -t and -i options important)
-t<n> : tab is worth <n> characters
-i<n> : indents should be <n> characters
-h : print this text
path is file or directory
"""
if __name__ == '__main__':
import sys, getopt, os
opts, args = getopt.getopt(sys.argv[1:], "TSMht:i:")
d = {}
#print `opts`
for opt in opts:
if opt[0] == '-T':
d['style'] = TABSONLY
elif opt[0] == '-S':
d['style'] = SPACESONLY
elif opt[0] == '-M':
d['style'] = MIXED
elif opt[0] == '-t':
d['tabs'] = opt[1]
elif opt[0] == '-i':
d['indent'] = opt[1]
elif opt[0] == '-h':
print usage % sys.argv[0]
sys.exit(0)
if not args:
print usage % sys.argv[0]
for arg in args:
cleanfile(arg, d)
--------------1B1EF91C859852EFAA66EB3D--