Python Script for Running a Python Program over Different Files inthe Directory

John Roth newsgroups at jhrothjr.com
Sat Mar 13 18:06:12 CET 2004


"Shalen chhabra" <shalen_itbhu at hotmail.com> wrote in message
news:mailman.351.1079166133.19534.python-list at python.org...
> Hey,
>
> Can anyone give me a snippet for running a python program over all the
files
> in the directory.
> For ex:  I have ten files in a directory and I want to run a python
program
> against all of these files, I wish to do the same using another python
code
> instead of running each of these files one by one, which would be
cumbersome
> giving the argv of each file every single time.
>
> This can be easily done using a shell script but I just wanted to have a
> flavour of python for this.

Given your reply to the attempts to help, I'm going to
assume that what you want is to separate the actual manipulation
of each file from the logic of determining which files to manipulate.

If this isn't what you want, please stop reading now and don't
bother to reply - it'll save both of us aggrivation.

The answer to the problem is the visitor pattern. It's a
standard pattern (see "Design Patterns" [GOF].)

The directory program is:

---------- DirBase.py ------------------------
# Basic classes for file maintenance

import os, stat, os.path

class DirectoryList(object):
    def __init__(self, pathName):
        self.dirList = os.listdir(pathName)
        self.pathName = pathName
        self.dirList.sort()

    def walk(self, visitor):
        for fileName in self.dirList:
            filePath = os.path.join(self.pathName, fileName)
            fileStatus = os.stat(filePath)
            if stat.S_ISREG(fileStatus.st_mode):
                visitor.doFile(filePath, fileStatus)
            else:
                visitor.doDir(filePath, fileStatus)

class cleanDirectory(object):
    def doFile(self, filePath, fileStatus):
        os.remove(filePath)

    def doDir(self, dirPath, dirStatus):
        DirectoryList(dirPath).walk(cleanDirectory())
        os.rmdir(dirPath)

def fetchFile(inDirPath, fileName):
    filePath = os.path.join(inDirPath, fileName)
    fileObj = open(filePath, 'rb')
    fileText = fileObj.read()
    fileObj.close()
    return fileText

def fetchTextFile(inDirPath, fileName):
    filePath = os.path.join(inDirPath, fileName)
    fileObj = open(filePath, 'rt')
    fileList = fileObj.readlines()
    fileObj.close()
    return fileList

def storeFile(fileText, outNameList, fileStatus):
    outFilePath = os.path.join(*outNameList)
    outFileObj = open(outFilePath, 'wb')
    outFileObj.write(fileText)
    outFileObj.close()
    os.utime(outFilePath,(fileStatus.st_atime, fileStatus.st_mtime))

def storeTextFile(fileList, outNameList, fileStatus):
    outFilePath = os.path.join(*outNameList)
    outFileObj = open(outFilePath, 'wt')
    outFileObj.writelines(fileList)
    outFileObj.close()
    os.utime(outFilePath,(fileStatus.st_atime, fileStatus.st_mtime))

------------------------------------------------------------

An example of how to use it is:

----------- MyFileManipulationProgram.py ---------------

# reorganize files captured from the *** web site

import os, stat, os.path
import re
from DirBase import *

def setUpOutdir():
    DirectoryList("outDir").walk(cleanDirectory())

class copyPicture(object):
    def doFile(self, filePath, fileStatus):
        head, tail = os.path.split(filePath)
        fileText = fetchFile(head, tail)
        storeFile(fileText, ("outDir", "pics", tail), fileStatus)

    def doDir(self, dirPath, dirStatus):
        pass

# precompile patterns used for multiple files
script = re.compile(r"<script>.*?</script>")
meta = re.compile(r"<META.*?>")
cmnt = re.compile(r"<\!--.*?-->")
cmnt1 = re.compile(r"<\!--//-->")
html = re.compile(r"\.html")

class copyWebPage(object):
    def doFile(self, inFilePath, fileStatus):
        head, tail = os.path.split(inFilePath)
        fileName, extension = os.path.splitext(tail)
        inFileText = fetchFile(head, tail)
        print ("path: '%s' head: '%s' tail: '%s' name: '%s' ext: '%s'\n" %
               (inFilePath, head, tail, fileName, extension))
        if extension == ".htm":
            inFileText = script.sub("", inFileText)
            #inFileText = meta.sub("", inFileText)
            inFileText = cmnt1.sub("", inFileText)
            inFileText = html.sub(".htm", inFileText)
            subPattern = "%s_files" % tail[:-4]
            inFileText = re.sub(subPattern, "pics", inFileText)

        storeFile(inFileText, ("outDir", tail), fileStatus)

    def doDir(self, dirPath, dirStatus):
        pass

def main(inDirPath):
    DirectoryList(inDirPath).walk(copyWebPage())

if __name__ == "__main__":
    setUpOutdir()
    main(r"c:\mydirectory")

----------------------------------------------------------------------

I have any number of file fixup programs that use the
same DirBase.py program.

HTH

John Roth

> Thanks
> Shalen





More information about the Python-list mailing list