[Tutor] simple copy within a python script

Simon Brunning SBrunning@trisystems.co.uk
Thu, 26 Oct 2000 08:54:13 +0100


This message is in MIME format. Since your mail reader does not understand
this format, some or all of this message may not be legible.

------_=_NextPart_000_01C03F21.EEEEA7E4
Content-Type: text/plain

Dwain,
You might find the attached script useful - it allows you to set up a simple
text file containing a list of from and to values (tab separated by
default), and to apply those replacements to multiple files.

Cheers,
Simon Brunning
TriSystems Ltd.
sbrunning@trisystems.co.uk
 <<multirep.py>> 
> -----Original Message-----
> From:	Dwain Hargrave [SMTP:dwainh@corp.earthlink.net]
> Sent:	Wednesday, October 25, 2000 5:42 PM
> To:	'tutor@python.org'
> Subject:	[Tutor] simple copy within a python script
> 
> Writing a python script to find a string in a file and replace it with
> another.  But before I manipulate the file I want to make a copy of the
> file.  How within the python script can I do this?  Working on a Unix
> platform.
> 
> --
> Dwain Hargrave
> Unix Systems Administrator
> Earthlink Inc.
> 
> 
> 
> 
> _______________________________________________
> Tutor maillist  -  Tutor@python.org
> http://www.python.org/mailman/listinfo/tutor
> 
> 
> 
> 
-----------------------------------------------------------------------
The information in this email is confidential and may be legally privileged.
It is intended solely for the addressee. Access to this email by anyone else
is unauthorised. If you are not the intended recipient, any disclosure,
copying, distribution, or any action taken or omitted to be taken in
reliance on it, is prohibited and may be unlawful. TriSystems Ltd. cannot
accept liability for statements made which are clearly the senders own.

------_=_NextPart_000_01C03F21.EEEEA7E4
Content-Type: application/octet-stream;
	name="multirep.py"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
	filename="multirep.py"

#!/usr/bin/env python
# Module     : multirep.py
# Synopsis   : Multiple replacements
# Programmer : Simon Brunning - sbrunning@bigfoot.com
# Date       : 20/09/2000
# Notes      : Thanks to Fredrik Lundh for the MultiReplace class, =
which I have butchered.
'''Perform multiple replacements.
Takes a list of delimited from and to values from a replacements file,
and replaces the from values with the to values in the target file(s).
The target files are not changed - new version(s) of the target file(s)
are written, with the filename prefixed.

Usage  : Python multirep.py [options] replacementsfile targetfile(s)
Options: -h =3D help
         -d =3D delimiter (defaults to tabs, s =3D spaces, c =3D =
commas)
         -p =3D prefix for new files (defaults to 'New')
         -c =3D case insensitive match
         -w =3D replace whole words only'''

import sys, re

def multirep(arguments):
    import getopt, glob, operator
   =20
    # Split arguments list into options and arguments
    options, arguments =3D getopt.getopt(arguments, '?hd:p:wc')

    # Set defaults
    delimiter =3D '\t'
    prefix =3D 'New'
    wholeWords =3D None
    caseInsensitive =3D None

    # Options - override defaults ond show help   =20
    for option, value in options:
        if option[-1] in '?h':
            print; print __doc__
        elif option[-1] =3D=3D 'd': # Specify delimiter
            if value =3D=3D 's':
                delimiter =3D ' '
            elif value =3D=3D 'c':
                delimiter =3D ','
        elif option[-1] =3D=3D 'p': # Specify prefix
            prefix =3D value
        elif option[-1] =3D=3D 'w': # Whole words
            wholeWords =3D 1
        elif option[-1] =3D=3D 'c': # case insensitive
            caseInsensitive =3D 1

    # 1st argument is the replacements file
    try:
        replacementsFile =3D arguments[0]
    except IndexError:
        print; print __doc__
        return

    # Expand remaining arguments into target file list
    try:
        targetFiles =3D reduce(operator.add, map(glob.glob, =
arguments[1:]))
    except TypeError:
        print; print __doc__
        return
    except AssertionError:
        print; print 'Invalid replacements file.'
        return

    # Build replacement object from replacements file
    multiReplace =3D getReplacements(replacementsFile, delimiter, =
wholeWords, caseInsensitive)

    # Perform replacement on each file
    for file in targetFiles:
        replaceFile(file, multiReplace, prefix)

def getReplacements(replacementsFile, delimiter=3D'\t', =
wholeWords=3DNone, caseInsensitive=3DNone):
   =20
    replacementsFile =3D open(replacementsFile, 'r') # Open =
replacements file.

    replacements =3D {}   # Empty dictionary for replacements

    # For each    =20
    while 1:
        line =3D replacementsFile.readline()
        if line =3D=3D '':    # EOF
            break
        replacement =3D line.split(delimiter) # Split line
        assert len(replacement) > 1 # There should be both a from and =
to value
        while replacement[1][-1] in '\r\n': # Strip newlines
            replacement[1] =3D replacement[1][:-1]
        replacements[replacement[0]] =3D replacement[1] # Add to =
dictionary

    replacementsFile.close()    # Close replacements file

    return MultiReplace(replacements, wholeWords, caseInsensitive) # =
Build replacement object from dictionary

def replaceFile(file, multiReplace, prefix=3D'New'):
    import os

    infile =3D open(file, 'rb')
    filedata =3D infile.read()
    infile.close()

    filedata =3D multiReplace.replace(filedata)
   =20
    outfile =3D os.path.join(os.path.dirname(file), # To-file name
                           ' '.join((prefix, os.path.basename(file))))
    outfile =3D open(outfile, 'wb')
    outfile.write(filedata)
    outfile.close()

class MultiReplace:
    def __init__(self, replacements, wholeWords=3DNone, =
caseInsensitive=3DNone):
        self.wholeWords =3D wholeWords
        self.charMap =3D None
        self.replacements =3D replacements
       =20
        # Assume char to char mapping...
        if not wholeWords:
            charMap =3D map(chr, range(256))
            for fromVal, toVal in replacements.items():
                if len(fromVal) <> 1 or len(toVal) <> 1:
                    break
                if caseInsensitive:
                    charMap[ord(fromVal.upper())] =3D toVal
                    charMap[ord(fromVal.lower())] =3D toVal
                else:
                    charMap[ord(fromVal)] =3D toVal
            else:
                self.charMap =3D "".join(charMap)
                return

        # String to string mapping - use a regular expression       =20
        fromVals =3D replacements.keys()
        fromVals.sort() # lexical order

        if not wholeWords: # Build re pattern
            rePattern =3D '|'.join(map(re.escape, fromVals))
        else:
            rePattern =3D r'\b(' + '|'.join(map(re.escape, fromVals)) + =
r')\b'
       =20
        if caseInsensitive: # Compile re
            self.rePattern =3D re.compile(rePattern, re.I)
        else:
            self.rePattern =3D re.compile(rePattern)

    def replace(self, string):
        # apply replacement to string
       =20
        if self.charMap: # Char to char mapping
            return string.translate(self.charMap)

        # String to string mapping       =20
        return self.rePattern.sub(self.__replaceInMatch, string)
   =20
    def __replaceInMatch(self, match):
        item =3D match.group(0)
        return self.replacements.get(item, item)
       =20
if __name__ =3D=3D '__main__':
    multirep(sys.argv[1:])
------_=_NextPart_000_01C03F21.EEEEA7E4--