Strip white spaces from source
Richie Hindle
richie at entrian.com
Mon May 9 06:47:24 EDT 2005
[qwweeeit]
> I need to limit as much as possible the lenght of a source line,
> stripping white spaces (except indentation).
> For example:
> . . max_move and AC_RowStack.acceptsCards ( self, from_stack, cards
> )
> must be reduced to:
> . . max_move and AC_RowStack.acceptsCards(self,from_stack,cards)
Here's a script that does some of what you want (stripping whitespace within
the three types of brackets). It was written to make code more compliant with
the Python style guide.
------------------------------- unspace.py -------------------------------
"""Strips spaces from inside brackets in Python source code, turning
( this ) into (this) and [ 1, ( 2, 3 ) ] into [1, (2, 3)]. This makes
the code more compliant with the Python style guide. Usage:
unspace.py filename
Output goes to stdout.
This file is deliberately written with lots of spaces within brackets,
so you can use it as test input.
"""
import sys, re, token, tokenize
OPEN = [ '(', '[', '{' ]
CLOSE = [ ')', ']', '}' ]
class UnSpace:
"""Holds the state of the process; onToken is a tokenize.tokenize
callback.
"""
def __init__( self ):
self.line = None # The text of the current line.
self.number = -1 # The line number of the current line.
self.deleted = 0 # How many spaces have been deleted from 'line'.
self.last_srow = 0
self.last_scol = 0
self.last_erow = 0
self.last_ecol = 0
self.last_line = ''
def onToken( self, type, tok, ( srow, scol ), ( erow, ecol ), line ):
"""tokenize.tokenize callback."""
# Print trailing backslashes plus the indent for new lines.
if self.last_erow != srow:
match = re.search( r'(\s+\\\n)$', self.last_line )
if match:
sys.stdout.write( match.group( 1 ) )
sys.stdout.write( line[ :scol ] )
# Print intertoken whitespace except the stuff to strip.
if self.last_srow == srow and \
not ( self.last_type == token.OP and self.last_tok in OPEN ) and \
not ( type == token.OP and tok in CLOSE ):
sys.stdout.write( line[ self.last_ecol:scol ] )
# Print the token itself.
sys.stdout.write( tok )
# Remember the properties of this token.
self.last_srow, self.last_scol = ( srow, scol )
self.last_erow, self.last_ecol = ( erow, ecol )
self.last_type, self.last_tok = type, tok
self.last_line = line
def flush( self ):
if self.line is not None:
sys.stdout.write( self.line )
if __name__ == '__main__':
if len( sys.argv ) != 2:
print __doc__
else:
file = open( sys.argv[ 1 ], 'rt' )
unSpace = UnSpace()
tokenize.tokenize( file.readline, unSpace.onToken )
unSpace.flush()
--
Richie Hindle
richie at entrian.com
More information about the Python-list
mailing list