[Tutor] recursive glob -- recursive dir walk
spir
denis.spir at free.fr
Wed Jun 10 08:28:35 CEST 2009
Hello,
A foolow-up ;-) from previous question about glob.glob().
I need to 'glob' files recursively from a top dir (parameter). Tried to use os.walk, but the structure of its return value is really unhandy for such a use (strange, because it seems to me this precise use is typical). On the other hand, os.path.walk seemed to meet my needs, but it is deprecated.
I'd like to know if there are standard tools to do that. And your comments on the 2 approaches below.
Thank you,
denis
-1- I first wrote the following recurseDirGlob() tool func.
========================================================
import os, glob
def dirGlob(dir, pattern):
''' File names matching pattern in directory dir. '''
fullPattern = os.path.join(dir,pattern)
return glob.glob(fullPattern)
def recurseDirGlob(topdir=None, pattern="*.*", nest=False, verbose=False):
''' '''
allFilenames = list()
# current dir
if verbose:
print "*** %s" %topdir
if topdir is None: topdir = os.getcwd()
filenames = dirGlob(topdir, pattern)
if verbose:
for filename in [os.path.basename(d) for d in filenames]:
print " %s" %filename
allFilenames.extend(filenames)
# possible sub dirs
names = [os.path.join(topdir, dir) for dir in os.listdir(topdir)]
dirs = [n for n in names if os.path.isdir(n)]
if verbose:
print "--> %s" % [os.path.basename(d) for d in dirs]
if len(dirs) > 0:
for dir in dirs:
filenames = recurseDirGlob(dir, pattern, nest, verbose)
if nest:
allFilenames.append(filenames)
else:
allFilenames.extend(filenames)
# final result
return allFilenames
========================================================
Example with the following dir structure ; the version with nest=True will recursively nest files from subdirs.
========================================================
d0
d01
d02
d020
2 .txt files and 1 with a different pattern, in each dir
recurseDirGlob("/home/spir/prog/d0", "*.txt", verbose=True) -->
*** /home/spir/prog/d0
t01.txt
t02.txt
--> ['d01', 'd02']
*** /home/spir/prog/d0/d01
t011.txt
t012.txt
--> []
*** /home/spir/prog/d0/d02
t021.txt
t022.txt
--> ['d020']
*** /home/spir/prog/d0/d02/d020
t0201.txt
t0202.txt
--> []
['/home/spir/prog/d0/t01.txt', '/home/spir/prog/d0/t02.txt', '/home/spir/prog/d0/d01/t011.txt', '/home/spir/prog/d0/d01/t012.txt', '/home/spir/prog/d0/d02/t021.txt', '/home/spir/prog/d0/d02/t022.txt', '/home/spir/prog/d0/d02/d020/t0201.txt', '/home/spir/prog/d0/d02/d020/t0202.txt']
recurseDirGlob("/home/spir/prog/d0", "*.txt") -->
['/home/spir/prog/d0/t01.txt', '/home/spir/prog/d0/t02.txt', '/home/spir/prog/d0/d01/t011.txt', '/home/spir/prog/d0/d01/t012.txt', '/home/spir/prog/d0/d02/t021.txt', '/home/spir/prog/d0/d02/t022.txt', '/home/spir/prog/d0/d02/d020/t0201.txt', '/home/spir/prog/d0/d02/d020/t0202.txt']
recurseDirGlob("/home/spir/prog/d0", "*.txt", nest=True) -->
['/home/spir/prog/d0/t01.txt', '/home/spir/prog/d0/t02.txt', ['/home/spir/prog/d0/d01/t011.txt', '/home/spir/prog/d0/d01/t012.txt'], ['/home/spir/prog/d0/d02/t021.txt', '/home/spir/prog/d0/d02/t022.txt', ['/home/spir/prog/d0/d02/d020/t0201.txt', '/home/spir/prog/d0/d02/d020/t0202.txt']]]
========================================================
-2- Another approach was to build a general 'dirWalk' tool func, similar to os.path.walk:
========================================================
def dirWalk(topdir=None, func=None, args=[], nest=False, verbose=False):
''' '''
allResults = list()
# current dir
if verbose:
print "*** %s" %topdir
if topdir is None: topdir = os.getcwd()
results = func(topdir, *args)
if verbose:
print " %s" % results
allResults.extend(results)
# possible sub dirs
names = [os.path.join(topdir, dir) for dir in os.listdir(topdir)]
dirs = [n for n in names if os.path.isdir(n)]
if verbose:
print "--> %s" % [os.path.basename(d) for d in dirs]
if len(dirs) > 0:
for dir in dirs:
results = dirWalk(dir, func, args, nest, verbose)
if nest:
allResults.append(results)
else:
allResults.extend(results)
# final allResults
return allResults
========================================================
Example uses to bring the same results, calling dirGlob, would be:
dirWalk("/home/spir/prog/d0", dirGlob, args=["*.txt"], verbose=True) -->
dirWalk("/home/spir/prog/d0", dirGlob, args=["*.txt"])
dirWalk("/home/spir/prog/d0", dirGlob, args=["*.txt"], nest=True)
Denis
------
la vita e estrany
More information about the Tutor
mailing list