efficient mega-replacements
Simon Brunning
SBrunning at trisystems.co.uk
Thu Jan 24 10:52:36 EST 2002
> From: Clark C . Evans [SMTP:cce at clarkevans.com]
> Is there an efficient way to do multiple replacements?
>
> val = val.replace("'","''")\
> .replace("\\","\\\\\\\\\\\\\\\\")\
> .replace(chr(13)+chr(10),"\\\\\\\\n")\
> .replace(chr(10),"\\\\\\\\n")\
> .replace(chr(13),"\\\\\\\\n")\
> .replace('"','\\\\"')
>
> Or is this the best way to do it?
Some time ago, /F gave me some code, which I've modified slightly:
class MultiReplacer:
def __init__(self, replacements, delimiter='\t', wholeWords=None,
caseInsensitive=None):
# Build replacements dictionary - may come in as a mapping or as a
file
self.replacements = {}
try:
# replacements is a mapping
self.replacements.update(replacements)
except TypeError:
# replacements is a file
replacementsFile = open(replacements, 'r')
for line in replacementsFile.readlines():
fromValue, toValue = line.split(delimiter)[:2] # Split line
while toValue[-1] in '\r\n': # Strip newlines
toValue = toValue[:-1]
self.replacements[fromValue] = toValue # Add to dictionary
replacementsFile.close()
# Build char to char mapping...
self.charMap = None
if not wholeWords:
charMap = map(chr, range(256))
for fromValue, toValue in self.replacements.items():
if len(fromValue) <> 1 or len(toValue) <> 1:
break
if caseInsensitive:
charMap[ord(fromValue.upper())] = toValue
charMap[ord(fromValue.lower())] = toValue
else:
charMap[ord(fromValue)] = toValue
else:
self.charMap = "".join(charMap)
return
# String to string mapping - use a regular expression
import re
fromVals = self.replacements.keys()
fromVals.sort()
# Build regexp pattern
if not wholeWords:
rePattern = '|'.join(map(re.escape, fromVals))
else:
rePattern = r'\b(' \
+ '|'.join(map(re.escape, fromVals)) + r')\b'
# Compile regexp
if caseInsensitive:
self.reObject = re.compile(rePattern, re.I)
else:
self.reObject = re.compile(rePattern)
def __call__(self, string):
# apply replacement to string
# Char to char mapping
if self.charMap:
return string.translate(self.charMap)
# String to string mapping
return self.reObject.sub(self.__replaceMatch, string)
def __replaceMatch(self, match):
item = match.group(0)
return self.replacements.get(item)
Use it like this:
>>> spammer = MultiReplacer({'foo': 'spam', 'bar': 'egg', 'baz': 'chips'})
>>> spammer('foo, foo, foo, bar and baz')
'spam, spam, spam, egg and chips'
You can also drive it from a delimited file - have a play.
Cheers,
Simon Brunning
TriSystems Ltd.
sbrunning at trisystems.co.uk
-----------------------------------------------------------------------
The information in this email is confidential and may be legally privileged.
It is intended solely for the addressee. Access to this email by anyone else
is unauthorised. If you are not the intended recipient, any disclosure,
copying, distribution, or any action taken or omitted to be taken in
reliance on it, is prohibited and may be unlawful. TriSystems Ltd. cannot
accept liability for statements made which are clearly the senders own.
More information about the Python-list
mailing list