[Pythonmac-SIG] space indented files

Chris Barker cbarker@jps.net
Mon, 18 Oct 1999 10:45:32 -0700

This is a multi-part message in MIME format.
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

Oliver Steele wrote:
Remaining work:
- recognize files indented with a mixture of tabs and spaces
- test the heuristics on more files
- add a user indication and user control for the translation (I probably
won't do this one, but if it gets added to the IDE someone might want to

This sounds great!

I don't know if it will be helpful, but I've enclosed a script I got
from someone that does tab-space-tab+space translation for you (I'd love
to credit the author, but s/he did't put a name in the file, and I don't
remember who i got it from). I suppose the tricky part is to figure out
what format was orignally used. I have to say, though, that I'd be just
as happy if tab+spaces was NOT preserved! using the two together is a
great way to invite problems.

Note also, that if the file uses a mixture of tabs and spaces, you have
to use a tabstop of 8 spaces, as this is what Python assumes.

Christopher Barker,
cbarker@jps.net                      ---           ---           ---
http://www.jps.net/cbarker          -----@@       -----@@       -----@@
                                   ------@@@     ------@@@     ------@@@
Water Resources Engineering       ------   @    ------   @   ------   @
Coastal and Fluvial Hydrodynamics -------      ---------     --------    
Content-Type: text/plain; charset=us-ascii;
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;


import tokenize
import string


class PyText:
    def __init__(self, fnm, optdict):
        self.optdict = optdict
        self.fnm = fnm
    	self.txt = open(self.fnm, 'r').readlines()
    	self.indents = [(0, 0, )]
    	self.lnndx = 0
    	self.indentndx = 0
    def getline(self):
        if self.lnndx < len(self.txt):
            txt = self.txt[self.lnndx]
            self.lnndx = self.lnndx + 1
            txt = ''
        return txt
    def tokeneater(self, type, token, start, end, line):
        if type == tokenize.INDENT:
            (lvl, s) = self.indents[-1]
            self.indents[-1] = (lvl, s, start[0]-1)
            self.indents.append((lvl+1, start[0]-1,))
        elif type == tokenize.DEDENT:
            (lvl, s) = self.indents[-1]
            self.indents[-1] = (lvl, s, start[0]-1)
            self.indents.append((lvl-1, start[0]-1,))
        elif type == tokenize.ENDMARKER:
            (lvl, s) = self.indents[-1]
            self.indents[-1] = (lvl, s, len(self.txt))
    def split(self, ln):
        content = string.lstrip(ln)
        if not content:
            return ('', '\n')
        lead = ln[:len(ln) - len(content)]
        lead = string.expandtabs(lead)
        return (lead, content)
    def process(self):
        style = self.optdict.get('style', SPACESONLY)
        indent = string.atoi(self.optdict.get('indent', '4'))
        tabsz = string.atoi(self.optdict.get('tabs', '8'))
        print 'file %s -> style %s, tabsize %d, indent %d' % (self.fnm, style, tabsz, indent)
        tokenize.tokenize(self.getline, self.tokeneater)
        #import pprint
        new = []
        for (lvl, s, e) in self.indents:
            if s >= len(self.txt):
            if s == e:
            oldlead, content = self.split(self.txt[s])
            #print "oldlead", len(oldlead), `oldlead`
            if style == TABSONLY:
                newlead = '\t'*lvl
            elif style == SPACESONLY:
                newlead = ' '*(indent*lvl)
                sz = indent*lvl
                t,spcs = divmod(sz, tabsz)
                newlead = '\t'*t + ' '*spcs
            new.append(newlead + content)
            for ln in self.txt[s+1:e]:
                lead, content = self.split(ln)
                #print "lead:", len(lead)
                new.append(newlead + lead[len(oldlead):] + content)
        #print "---", self.fnm
        #for ln in new:
        #    print ln,
    def save(self, txt):
        bakname = os.path.splitext(self.fnm)[0]+'.bak'
        print "backing up", self.fnm, "to", bakname
        #print os.getcwd()
            os.rename(self.fnm, bakname)
        except os.error:
            os.rename(self.fnm, bakname)
        open(self.fnm, 'w').writelines(txt)

def test():
    tc = PyText('test1.py')
    tc = PyText('test1.py')
    tc = PyText('test1.py')
    tc.process(style=MIXED, indent=4, tabs=8)
    tc = PyText('test1.py')
    tc.process(style=MIXED, indent=2, tabs=8)
def cleanfile(fnm, d):
    if os.path.isdir(fnm) and not os.path.islink(fnm):
        names = os.listdir(fnm)
        for name in names:
            fullnm = os.path.join(fnm, name)
            if (os.path.isdir(fullnm) and not os.path.islink(fullnm)) or \
            	os.path.normcase(fullnm[-3:]) == ".py":
            	cleanfile(fullnm, d)
    tc = PyText(fnm, d)
%s [options] [path...]
  -T : reformat to TABS ONLY
  -S : reformat to SPACES ONLY ( -i option is important)
  -M : reformat to MIXED SPACES / TABS ( -t and -i options important)
  -t<n> : tab is worth <n> characters
  -i<n> : indents should be <n> characters 
  -h : print this text
 path is file or directory
if __name__ == '__main__':
    import sys, getopt, os
    opts, args = getopt.getopt(sys.argv[1:], "TSMht:i:")
    d = {}
    #print `opts`
    for opt in opts:
        if opt[0] == '-T':
            d['style'] = TABSONLY
        elif opt[0] == '-S':
            d['style'] = SPACESONLY
        elif opt[0] == '-M':
            d['style'] = MIXED
        elif opt[0] == '-t':
            d['tabs'] = opt[1]
        elif opt[0] == '-i':
            d['indent'] = opt[1]
        elif opt[0] == '-h':
            print usage % sys.argv[0]
    if not args:
        print usage % sys.argv[0]
    for arg in args:
        cleanfile(arg, d)