how to write a text file search & replace script

Simon Brunning SBrunning at trisystems.co.uk
Tue Mar 27 10:23:43 CEST 2001


The script below might be of some use. To use it, set up a text file with
all your from values and to values in it (one pair per line, tab separated),
and run it over your files.

#!/usr/bin/env python
# Module     : multirep.py
# Synopsis   : Multiple replacements
# Programmer : Simon Brunning - sbrunning at bigfoot.com
# Date       : 20/09/2000
# Notes      : Thanks to Fredrik Lundh for the MultiReplace class, which I
have butchered.
'''Perform multiple replacements.
Takes a list of delimited from and to values from a replacements file,
and replaces the from values with the to values in the target file(s).
The target files are not changed - new version(s) of the target file(s)
are written, with the filename prefixed.

Usage  : Python multirep.py [options] replacementsfile targetfile(s)
Options: -h = help
         -d = delimiter (defaults to tabs, s = spaces, c = commas)
         -p = prefix for new files (defaults to 'New')
         -c = case insensitive match
         -w = replace whole words only'''

def multirep(arguments):
    import getopt, glob, operator
    
    # Split arguments list into options and arguments
    options, arguments = getopt.getopt(arguments, '?hd:p:wc')

    # Set defaults
    delimiter = '\t'
    prefix = 'New'
    wholeWords = None
    caseInsensitive = None

    # Options - override defaults ond show help    
    for option, value in options:
        if option[-1] in '?h':
            print; print __doc__
        elif option[-1] == 'd': # Specify delimiter
            if value == 's':
                delimiter = ' '
            elif value == 'c':
                delimiter = ','
        elif option[-1] == 'p': # Specify prefix
            prefix = value
        elif option[-1] == 'w': # Whole words
            wholeWords = 1
        elif option[-1] == 'c': # case insensitive
            caseInsensitive = 1

    # Build replacement function from replacements file
    try:
        replacer = MultiReplacer(arguments[0], delimiter, wholeWords,
caseInsensitive)
    except IndexError:
        print; print __doc__
        return
    except ValueError:
        print; print 'Invalid replacements file.'
        return
    
    # Expand remaining arguments into target file list
    try:
        targetFiles = reduce(operator.add, map(glob.glob, arguments[1:]))
    except TypeError:
        print; print __doc__
        return

    # Perform replacement on each file
    for file in targetFiles:
        replaceFile(file, replacer, prefix)

def replaceFile(infile, replacer, prefix='New'):
    import os

    # Build outfile name    
    outfile = os.path.join(os.path.dirname(infile), ''.join((prefix,
os.path.basename(infile))))

    # Read from infile, replace values, and write to outfile    
    open(outfile, 'wb').write(replacer(open(infile, 'rb').read()))

class MultiReplacer:
    def __init__(self, replacements, delimiter='\t', wholeWords=None,
caseInsensitive=None):

        # Build replacements dictionary - may come in as a mapping or as a
file         
        self.replacements = {}
        try:
            # replacements is a mapping
            self.replacements.update(replacements)
        except TypeError:
            # replacements is a file
            for line in open(replacements, 'r').readlines():
                fromValue, toValue = line.split(delimiter)[:2] # Split line
                
                while toValue[-1] in '\r\n': # Strip newlines
                    toValue = toValue[:-1]

                self.replacements[fromValue] = toValue # Add to dictionary
  
        # Build char to char mapping...
        self.charMap = None
        if not wholeWords:
            charMap = map(chr, range(256))
            for fromValue, toValue in self.replacements.items():
                if len(fromValue) <> 1 or len(toValue) <> 1:
                    break
                if caseInsensitive:
                    charMap[ord(fromValue.upper())] = toValue
                    charMap[ord(fromValue.lower())] = toValue
                else:
                    charMap[ord(fromValue)] = toValue
            else:
                self.charMap = "".join(charMap)
                return

        # String to string mapping - use a regular expression
        import re
        fromVals = replacements.keys().sort()

        # Build regexp pattern
        if not wholeWords:
            rePattern = '|'.join(map(re.escape, fromVals))
        else:
            rePattern = r'\b(' + '|'.join(map(re.escape, fromVals)) + r')\b'
        
        # Compile regexp
        if caseInsensitive: 
            self.rePattern = re.compile(rePattern, re.I)
        else:
            self.rePattern = re.compile(rePattern)

    def __call__(self, string):
        # apply replacement to string
        
        # Char to char mapping
        if self.charMap: 
            return string.translate(self.charMap)

        # String to string mapping        
        return self.rePattern.sub(self.__replaceMatch, string)
    
    def __replaceMatch(self, match):
        item = match.group(0)
        return self.replacements.get(item)
        
if __name__ == '__main__':
    import sys
    multirep(sys.argv[1:])

Cheers,
Simon Brunning
TriSystems Ltd.
sbrunning at trisystems.co.uk





-----------------------------------------------------------------------
The information in this email is confidential and may be legally privileged.
It is intended solely for the addressee. Access to this email by anyone else
is unauthorised. If you are not the intended recipient, any disclosure,
copying, distribution, or any action taken or omitted to be taken in
reliance on it, is prohibited and may be unlawful. TriSystems Ltd. cannot
accept liability for statements made which are clearly the senders own.




More information about the Python-list mailing list