remove strings from source
M.E.Farmer
mefjr75 at hotmail.com
Sat Feb 26 16:23:27 EST 2005
qwweeeit wrote:
> For a python code I am writing I need to remove all strings
> definitions from source and substitute them with a place-holder.
>
> To make clearer:
> line 45 sVar="this is the string assigned to sVar"
> must be converted in:
> line 45 sVar=s00001
>
> Such substitution is recorded in a file under:
> s0001[line 45]="this is the string assigned to sVar"
>
> For curious guys:
> I am trying to implement a cross variable reference tool and the
> variability (in lenght) of the string definitions (expecially if
> multi-line) can cause display problems.
>
> I need your help in correctly identifying the strings (also embedding
> the r'xx..' or u'yy...' as part of the string definition). The
problem
> is mainly on the multi-line definitions or in cached strings
> (embedding chr() definitions or escape sequences).
Hello,
I have written a few python parsers before.
Here is my attempt :)
# string_mapper.py
from __future__ import generators# python 2.2
import keyword, os, sys, traceback
import cStringIO, token, tokenize
def StringNamer(num=0):
'''This is a name creating generator'''
while 1:
num += 1
stringname = 's'+str(num).zfill(6)
yield stringname
class ReplaceParser(object):
"""
>>> filein = open('yourfilehere.py').read()
>>> replacer = ReplaceParser(filein, out=sys.stdout)
>>> replacer.format()
>>> replacer.StringMap
"""
def __init__(self, raw, out=sys.stdout):
''' Store the source text.
'''
self.raw =raw.expandtabs().strip()
self.out = out
self.StringName = StringNamer()
self.StringMap = {}
def format(self):
''' Parse and send the source.
'''
self.lines = [0, 0]
pos = 0
self.temp = cStringIO.StringIO()
while 1:
pos = self.raw.find('\n', pos) + 1
if not pos: break
self.lines.append(pos)
self.lines.append(len(self.raw))
self.pos = 0
text = cStringIO.StringIO(self.raw)
try:
tokenize.tokenize(text.readline, self)
except tokenize.TokenError, ex:
traceback.print_exc()
def __call__(self, toktype, toktext, (srow,scol),
(erow,ecol), line):
''' Token handler.
'''
oldpos = self.pos
newpos = self.lines[srow] + scol
self.pos = newpos + len(toktext)
if toktype in [token.NEWLINE, tokenize.NL]:
self.out.write('\n')
return
if newpos > oldpos:
self.out.write(self.raw[oldpos:newpos])
if toktype in [token.INDENT, token.DEDENT]:
self.pos = newpos
return
if (toktype == token.STRING):
sname = self.StringName.next()
self.StringMap[sname] = toktext
toktext = sname
self.out.write(toktext)
self.out.flush()
return
hth,
M.E.Farmer
More information about the Python-list
mailing list