whitespace , comment stripper, and EOL converter

qwweeeit qwweeeit at yahoo.it
Sat Apr 16 10:48:08 EDT 2005


Hi,

At last I succeded in implementing a cross reference tool!
(with your help and that of other gurus...).
Now I can face the problem (for me...) of understanding your 
code (I have not grasped the classes and objects...).

I give you a brief example of the xref output (taken from your code,
also if the line numbers don't match, because I modified your code,
not beeing interested in eof's other than Linux).

and         076                     if self.lasttoken<=self.spaces and
self.spaces:
append      046             self.lines.append(pos)
append      048         self.lines.append(len(self.raw))
argv        116     if sys.argv[1]:
argv        117         filein = open(sys.argv[1]).read()
__author__  010 __author__ = s_
break       045             if not pos: break
__call__    080     def __call__(self, toktype, toktext, (srow,scol),
.                                        .                            
   (erow,ecol), line):
class       015 class Stripper:
COMMENT     092             if toktype == tokenize.COMMENT:
comments    021     def format(self, out=sys.stdout, comments=0,
spaces=1,untabify=1):
comments    033         self.comments = comments
comments    090         if not self.comments:
comments    118         Stripper(filein).format(out=sys.stdout,
comments=0,                        .                                  
            untabify=1)
__credits__ 008 __credits__ = s_
__date__    011 __date__ =  s_
DEDENT      105         if toktype in [token.INDENT, token.DEDENT]:
def         018     def __init__(self, raw):
def         021     def format(self, out=sys.stdout, comments=0, 
.                              spaces=1,untabify=1):
def         080     def __call__(self, toktype, toktext, (srow,scol),
(erow,ecol), line):
def         114 def Main():
ecol        080     def __call__(self, toktype, toktext, (srow,scol),
.                               (erow,ecol), line):
erow        080     def __call__(self, toktype, toktext, (srow,scol), 
.                                                 (erow,ecol), line):
ex          059         except tokenize.TokenError, ex:
except      059         except tokenize.TokenError, ex:
expandtabs  036            self.raw = self.raw.expandtabs()
filein      117         filein = open(sys.argv[1]).read()
filein      118         Stripper(filein).format(out=sys.stdout,
comments=0,
                                                           
untabify=1)
find        044             pos = self.raw.find(self.lineend, pos) + 1
format      021     def format(self, out=sys.stdout, comments=0, 
                               spaces=1,untabify=1):
format      118         Stripper(filein).format(out=sys.stdout,
comments=0,
                                               untabify=1)
import      005 import keyword, os, sys, traceback
import      006 import StringIO
import      007 import token, tokenize
import      115     import sys
INDENT      105         if toktype in [token.INDENT, token.DEDENT]:
__init__    018     def __init__(self, raw):
isspace     071                 if not line.isspace():
keyword     005 import keyword, os, sys, traceback
lasttoken   030         self.lasttoken = 1
lasttoken   072                     self.lasttoken=0
lasttoken   075                     self.lasttoken+=1
lasttoken   076                     if self.lasttoken<=self.spaces and
                                                       self.spaces:
...

To obtain this output, you must remove comments and empty lines, move
strings in a db file, leaving as place holder s_ for normal strings
and m_ for triple strings.
See an example:
    
m_   """python comment and whitespace stripper :)"""      #016
m_   ''' strip comments, strip extra whitespace, convert EOL's from
Python
                                                  code.'''#023     
m_   ''' Token handler.'''                                #082

s_ 'just another tool that I needed'	|008 __credits__ = 'just another
tool
                                                       that I needed'
s_ '.7'                   |009 __version__ = '.7'
s_ 'M.E.Farmer'           |010 __author__ = 'M.E.Farmer'
s_ 'Jan 15 2005, Oct 24 2004'	|011 __date__ =  'Jan 15 2005, Oct 24
2004'
s_ ' '                    |037         self.raw = self.raw.rstrip()+'
'
s_ '\n'                   |040         self.lineend = '\n'
s_ '__main__'             |122 if __name__ == '__main__':

I think that this tool is very useful.

Bye



More information about the Python-list mailing list