[Tutor] simple copy within a python script
Simon Brunning
SBrunning@trisystems.co.uk
Thu, 26 Oct 2000 08:54:13 +0100
This message is in MIME format. Since your mail reader does not understand
this format, some or all of this message may not be legible.
------_=_NextPart_000_01C03F21.EEEEA7E4
Content-Type: text/plain
Dwain,
You might find the attached script useful - it allows you to set up a simple
text file containing a list of from and to values (tab separated by
default), and to apply those replacements to multiple files.
Cheers,
Simon Brunning
TriSystems Ltd.
sbrunning@trisystems.co.uk
<<multirep.py>>
> -----Original Message-----
> From: Dwain Hargrave [SMTP:dwainh@corp.earthlink.net]
> Sent: Wednesday, October 25, 2000 5:42 PM
> To: 'tutor@python.org'
> Subject: [Tutor] simple copy within a python script
>
> Writing a python script to find a string in a file and replace it with
> another. But before I manipulate the file I want to make a copy of the
> file. How within the python script can I do this? Working on a Unix
> platform.
>
> --
> Dwain Hargrave
> Unix Systems Administrator
> Earthlink Inc.
>
>
>
>
> _______________________________________________
> Tutor maillist - Tutor@python.org
> http://www.python.org/mailman/listinfo/tutor
>
>
>
>
-----------------------------------------------------------------------
The information in this email is confidential and may be legally privileged.
It is intended solely for the addressee. Access to this email by anyone else
is unauthorised. If you are not the intended recipient, any disclosure,
copying, distribution, or any action taken or omitted to be taken in
reliance on it, is prohibited and may be unlawful. TriSystems Ltd. cannot
accept liability for statements made which are clearly the senders own.
------_=_NextPart_000_01C03F21.EEEEA7E4
Content-Type: application/octet-stream;
name="multirep.py"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
filename="multirep.py"
#!/usr/bin/env python
# Module : multirep.py
# Synopsis : Multiple replacements
# Programmer : Simon Brunning - sbrunning@bigfoot.com
# Date : 20/09/2000
# Notes : Thanks to Fredrik Lundh for the MultiReplace class, =
which I have butchered.
'''Perform multiple replacements.
Takes a list of delimited from and to values from a replacements file,
and replaces the from values with the to values in the target file(s).
The target files are not changed - new version(s) of the target file(s)
are written, with the filename prefixed.
Usage : Python multirep.py [options] replacementsfile targetfile(s)
Options: -h =3D help
-d =3D delimiter (defaults to tabs, s =3D spaces, c =3D =
commas)
-p =3D prefix for new files (defaults to 'New')
-c =3D case insensitive match
-w =3D replace whole words only'''
import sys, re
def multirep(arguments):
import getopt, glob, operator
=20
# Split arguments list into options and arguments
options, arguments =3D getopt.getopt(arguments, '?hd:p:wc')
# Set defaults
delimiter =3D '\t'
prefix =3D 'New'
wholeWords =3D None
caseInsensitive =3D None
# Options - override defaults ond show help =20
for option, value in options:
if option[-1] in '?h':
print; print __doc__
elif option[-1] =3D=3D 'd': # Specify delimiter
if value =3D=3D 's':
delimiter =3D ' '
elif value =3D=3D 'c':
delimiter =3D ','
elif option[-1] =3D=3D 'p': # Specify prefix
prefix =3D value
elif option[-1] =3D=3D 'w': # Whole words
wholeWords =3D 1
elif option[-1] =3D=3D 'c': # case insensitive
caseInsensitive =3D 1
# 1st argument is the replacements file
try:
replacementsFile =3D arguments[0]
except IndexError:
print; print __doc__
return
# Expand remaining arguments into target file list
try:
targetFiles =3D reduce(operator.add, map(glob.glob, =
arguments[1:]))
except TypeError:
print; print __doc__
return
except AssertionError:
print; print 'Invalid replacements file.'
return
# Build replacement object from replacements file
multiReplace =3D getReplacements(replacementsFile, delimiter, =
wholeWords, caseInsensitive)
# Perform replacement on each file
for file in targetFiles:
replaceFile(file, multiReplace, prefix)
def getReplacements(replacementsFile, delimiter=3D'\t', =
wholeWords=3DNone, caseInsensitive=3DNone):
=20
replacementsFile =3D open(replacementsFile, 'r') # Open =
replacements file.
replacements =3D {} # Empty dictionary for replacements
# For each =20
while 1:
line =3D replacementsFile.readline()
if line =3D=3D '': # EOF
break
replacement =3D line.split(delimiter) # Split line
assert len(replacement) > 1 # There should be both a from and =
to value
while replacement[1][-1] in '\r\n': # Strip newlines
replacement[1] =3D replacement[1][:-1]
replacements[replacement[0]] =3D replacement[1] # Add to =
dictionary
replacementsFile.close() # Close replacements file
return MultiReplace(replacements, wholeWords, caseInsensitive) # =
Build replacement object from dictionary
def replaceFile(file, multiReplace, prefix=3D'New'):
import os
infile =3D open(file, 'rb')
filedata =3D infile.read()
infile.close()
filedata =3D multiReplace.replace(filedata)
=20
outfile =3D os.path.join(os.path.dirname(file), # To-file name
' '.join((prefix, os.path.basename(file))))
outfile =3D open(outfile, 'wb')
outfile.write(filedata)
outfile.close()
class MultiReplace:
def __init__(self, replacements, wholeWords=3DNone, =
caseInsensitive=3DNone):
self.wholeWords =3D wholeWords
self.charMap =3D None
self.replacements =3D replacements
=20
# Assume char to char mapping...
if not wholeWords:
charMap =3D map(chr, range(256))
for fromVal, toVal in replacements.items():
if len(fromVal) <> 1 or len(toVal) <> 1:
break
if caseInsensitive:
charMap[ord(fromVal.upper())] =3D toVal
charMap[ord(fromVal.lower())] =3D toVal
else:
charMap[ord(fromVal)] =3D toVal
else:
self.charMap =3D "".join(charMap)
return
# String to string mapping - use a regular expression =20
fromVals =3D replacements.keys()
fromVals.sort() # lexical order
if not wholeWords: # Build re pattern
rePattern =3D '|'.join(map(re.escape, fromVals))
else:
rePattern =3D r'\b(' + '|'.join(map(re.escape, fromVals)) + =
r')\b'
=20
if caseInsensitive: # Compile re
self.rePattern =3D re.compile(rePattern, re.I)
else:
self.rePattern =3D re.compile(rePattern)
def replace(self, string):
# apply replacement to string
=20
if self.charMap: # Char to char mapping
return string.translate(self.charMap)
# String to string mapping =20
return self.rePattern.sub(self.__replaceInMatch, string)
=20
def __replaceInMatch(self, match):
item =3D match.group(0)
return self.replacements.get(item, item)
=20
if __name__ =3D=3D '__main__':
multirep(sys.argv[1:])
------_=_NextPart_000_01C03F21.EEEEA7E4--