how to write a text file search & replace script

Simon Brunning SBrunning at
Tue Mar 27 10:23:43 CEST 2001

The script below might be of some use. To use it, set up a text file with
all your from values and to values in it (one pair per line, tab separated),
and run it over your files.

#!/usr/bin/env python
# Module     :
# Synopsis   : Multiple replacements
# Programmer : Simon Brunning - sbrunning at
# Date       : 20/09/2000
# Notes      : Thanks to Fredrik Lundh for the MultiReplace class, which I
have butchered.
'''Perform multiple replacements.
Takes a list of delimited from and to values from a replacements file,
and replaces the from values with the to values in the target file(s).
The target files are not changed - new version(s) of the target file(s)
are written, with the filename prefixed.

Usage  : Python [options] replacementsfile targetfile(s)
Options: -h = help
         -d = delimiter (defaults to tabs, s = spaces, c = commas)
         -p = prefix for new files (defaults to 'New')
         -c = case insensitive match
         -w = replace whole words only'''

def multirep(arguments):
    import getopt, glob, operator
    # Split arguments list into options and arguments
    options, arguments = getopt.getopt(arguments, '?hd:p:wc')

    # Set defaults
    delimiter = '\t'
    prefix = 'New'
    wholeWords = None
    caseInsensitive = None

    # Options - override defaults ond show help    
    for option, value in options:
        if option[-1] in '?h':
            print; print __doc__
        elif option[-1] == 'd': # Specify delimiter
            if value == 's':
                delimiter = ' '
            elif value == 'c':
                delimiter = ','
        elif option[-1] == 'p': # Specify prefix
            prefix = value
        elif option[-1] == 'w': # Whole words
            wholeWords = 1
        elif option[-1] == 'c': # case insensitive
            caseInsensitive = 1

    # Build replacement function from replacements file
        replacer = MultiReplacer(arguments[0], delimiter, wholeWords,
    except IndexError:
        print; print __doc__
    except ValueError:
        print; print 'Invalid replacements file.'
    # Expand remaining arguments into target file list
        targetFiles = reduce(operator.add, map(glob.glob, arguments[1:]))
    except TypeError:
        print; print __doc__

    # Perform replacement on each file
    for file in targetFiles:
        replaceFile(file, replacer, prefix)

def replaceFile(infile, replacer, prefix='New'):
    import os

    # Build outfile name    
    outfile = os.path.join(os.path.dirname(infile), ''.join((prefix,

    # Read from infile, replace values, and write to outfile    
    open(outfile, 'wb').write(replacer(open(infile, 'rb').read()))

class MultiReplacer:
    def __init__(self, replacements, delimiter='\t', wholeWords=None,

        # Build replacements dictionary - may come in as a mapping or as a
        self.replacements = {}
            # replacements is a mapping
        except TypeError:
            # replacements is a file
            for line in open(replacements, 'r').readlines():
                fromValue, toValue = line.split(delimiter)[:2] # Split line
                while toValue[-1] in '\r\n': # Strip newlines
                    toValue = toValue[:-1]

                self.replacements[fromValue] = toValue # Add to dictionary
        # Build char to char mapping...
        self.charMap = None
        if not wholeWords:
            charMap = map(chr, range(256))
            for fromValue, toValue in self.replacements.items():
                if len(fromValue) <> 1 or len(toValue) <> 1:
                if caseInsensitive:
                    charMap[ord(fromValue.upper())] = toValue
                    charMap[ord(fromValue.lower())] = toValue
                    charMap[ord(fromValue)] = toValue
                self.charMap = "".join(charMap)

        # String to string mapping - use a regular expression
        import re
        fromVals = replacements.keys().sort()

        # Build regexp pattern
        if not wholeWords:
            rePattern = '|'.join(map(re.escape, fromVals))
            rePattern = r'\b(' + '|'.join(map(re.escape, fromVals)) + r')\b'
        # Compile regexp
        if caseInsensitive: 
            self.rePattern = re.compile(rePattern, re.I)
            self.rePattern = re.compile(rePattern)

    def __call__(self, string):
        # apply replacement to string
        # Char to char mapping
        if self.charMap: 
            return string.translate(self.charMap)

        # String to string mapping        
        return self.rePattern.sub(self.__replaceMatch, string)
    def __replaceMatch(self, match):
        item =
        return self.replacements.get(item)
if __name__ == '__main__':
    import sys

Simon Brunning
TriSystems Ltd.
sbrunning at

The information in this email is confidential and may be legally privileged.
It is intended solely for the addressee. Access to this email by anyone else
is unauthorised. If you are not the intended recipient, any disclosure,
copying, distribution, or any action taken or omitted to be taken in
reliance on it, is prohibited and may be unlawful. TriSystems Ltd. cannot
accept liability for statements made which are clearly the senders own.

More information about the Python-list mailing list