# # deferDirWalk.py # (C) 2004 Michal Pasternak # This code is public domain # import dircache from twisted.internet import defer class deferDirWalk: def __init__(self, baseDir, statusProc = None, skipList = [], getMTime = False): """ statusProc is sent 3 arguments: total files processed, total size of files processed and the last directory name, skipList is a list of directories, that should be skipped, getMTime is a boolean value, which tells the walker to get mtimes of files also """ self.baseDir = baseDir self.skipList = skipList self.statusProc = statusProc self.getMTime = getMTime self.restart() def restart(self): self.dirsToCheck = [os.path.realpath(self.baseDir)] self.totalSize = 0 self.doPause = False self.working = False self.fileDict = {} def run(self): """ returns a defer, which will be called after processing of directories is finished. """ self.d = defer.Deferred() self.working = True reactor.callLater(0.1, self.nextStep) return self.d def nextStep(self): if len(self.dirsToCheck): if not self.doPause: for entry in dircache.listdir(self.dirsToCheck[0]): p = os.path.join(self.dirsToCheck[0], entry) if os.path.islink(p): continue elif os.path.isdir(p): try: self.skipList.index(entry) except ValueError: self.dirsToCheck.append(p) elif os.path.isfile(p): try: s = os.path.getsize(p) if self.getMTime: self.fileDict[p]=(s, os.path.getmtime(p)) else: self.fileDict[p]=(s, None) self.totalSize+=s except OSError: log("Cannot stat %s" % p) if self.statusProc: self.statusProc(len(self.fileDict.items()), self.totalSize, self.dirsToCheck[0]) self.dirsToCheck = self.dirsToCheck[1:] reactor.callLater(0.01, self.nextStep) else: self.working = False self.d.callback((self.fileDict, self.totalSize)) def pause(self, doPause): """ if doPause is True, work of walker is paused; it is unpaused if it is not """ self.doPause = doPause if __name__ == "__main__": from twisted.internet import reactor import os, sys def archiveWalkStatus(fileCount, fileSize, lastDir): sys.stdout.write("%s (analyzed %i files (total size: %.2f MB))...\r" % (lastDir, fileCount, float(fileSize / (1024.0*1024.0)))) sys.stdout.flush() def archiveWalkDone(retVal): (fileDict, fileSize) = retVal sys.stdout.write("\nJob done!\n") sys.stdout.write("Totals:\n\tfiles: %i\n\tsize: %i bytes\n\n" % (len(fileDict.keys()), fileSize)) sys.stdout.flush() reactor.stop() if len(sys.argv)<2: sys.stdout.write("usage: deferDirWalk.py dirname\n") sys.exit(1) archiveWalker = deferDirWalk(os.path.realpath(sys.argv[1]), archiveWalkStatus) archiveWalker.run().addCallback(archiveWalkDone) reactor.run()