[Tutor] recursive glob -- recursive dir walk

Sander Sweers sander.sweers at gmail.com
Wed Jun 10 14:30:43 CEST 2009


2009/6/10 spir <denis.spir at free.fr>:
> A foolow-up ;-) from previous question about glob.glob().

Hopefully no misunderstanding this time :-)

> I need to 'glob' files recursively from a top dir (parameter). Tried to use os.walk, but the structure of its return value is really unhandy for such a use (strange, because it seems to me this precise use is typical). On the other hand, os.path.walk seemed to meet my needs, but it is deprecated.

Is it really derecated? It is still in the 3.0 docs with no mention of this..

> I'd like to know if there are standard tools to do that. And your comments on the 2 approaches below.

Well, this is what I came up with which I am sure someone can improve on.

>>> patern = '*.txt'
>>> topdir = 'C:\\GTK\\'
>>> textfiles = [f[0] for f in [glob.glob(os.path.join(d[0], patern)) for d in os.walk(topdir)] if f]
>>> textfiles
['C:\\GTK\\license.txt']

Greets
Sander


> -1- I first wrote the following recurseDirGlob() tool func.
>
> ========================================================
> import os, glob
>
> def dirGlob(dir, pattern):
>        ''' File names matching pattern in directory dir. '''
>        fullPattern = os.path.join(dir,pattern)
>        return glob.glob(fullPattern)
>
> def recurseDirGlob(topdir=None, pattern="*.*", nest=False, verbose=False):
>        '''  '''
>        allFilenames = list()
>        # current dir
>        if verbose:
>                print "*** %s" %topdir
>        if topdir is None: topdir = os.getcwd()
>        filenames = dirGlob(topdir, pattern)
>        if verbose:
>                for filename in [os.path.basename(d) for d in filenames]:
>                        print "   %s" %filename
>        allFilenames.extend(filenames)
>        # possible sub dirs
>        names = [os.path.join(topdir, dir) for dir in os.listdir(topdir)]
>        dirs = [n for n in names if os.path.isdir(n)]
>        if verbose:
>                print "--> %s" % [os.path.basename(d) for d in dirs]
>        if len(dirs) > 0:
>                for dir in dirs:
>                        filenames = recurseDirGlob(dir, pattern, nest, verbose)
>                        if nest:
>                                allFilenames.append(filenames)
>                        else:
>                                allFilenames.extend(filenames)
>        # final result
>        return allFilenames
> ========================================================
>
> Example with the following dir structure ; the version with nest=True will recursively nest files from subdirs.
>
> ========================================================
> d0
>        d01
>        d02
>                d020
> 2 .txt files and 1 with a different pattern, in each dir
>
> recurseDirGlob("/home/spir/prog/d0", "*.txt", verbose=True) -->
> *** /home/spir/prog/d0
>   t01.txt
>   t02.txt
> --> ['d01', 'd02']
> *** /home/spir/prog/d0/d01
>   t011.txt
>   t012.txt
> --> []
> *** /home/spir/prog/d0/d02
>   t021.txt
>   t022.txt
> --> ['d020']
> *** /home/spir/prog/d0/d02/d020
>   t0201.txt
>   t0202.txt
> --> []
> ['/home/spir/prog/d0/t01.txt', '/home/spir/prog/d0/t02.txt', '/home/spir/prog/d0/d01/t011.txt', '/home/spir/prog/d0/d01/t012.txt', '/home/spir/prog/d0/d02/t021.txt', '/home/spir/prog/d0/d02/t022.txt', '/home/spir/prog/d0/d02/d020/t0201.txt', '/home/spir/prog/d0/d02/d020/t0202.txt']
>
> recurseDirGlob("/home/spir/prog/d0", "*.txt") -->
> ['/home/spir/prog/d0/t01.txt', '/home/spir/prog/d0/t02.txt', '/home/spir/prog/d0/d01/t011.txt', '/home/spir/prog/d0/d01/t012.txt', '/home/spir/prog/d0/d02/t021.txt', '/home/spir/prog/d0/d02/t022.txt', '/home/spir/prog/d0/d02/d020/t0201.txt', '/home/spir/prog/d0/d02/d020/t0202.txt']
>
> recurseDirGlob("/home/spir/prog/d0", "*.txt", nest=True) -->
> ['/home/spir/prog/d0/t01.txt', '/home/spir/prog/d0/t02.txt', ['/home/spir/prog/d0/d01/t011.txt', '/home/spir/prog/d0/d01/t012.txt'], ['/home/spir/prog/d0/d02/t021.txt', '/home/spir/prog/d0/d02/t022.txt', ['/home/spir/prog/d0/d02/d020/t0201.txt', '/home/spir/prog/d0/d02/d020/t0202.txt']]]
> ========================================================
>
>
>
> -2- Another approach was to build a general 'dirWalk' tool func, similar to os.path.walk:
>
> ========================================================
> def dirWalk(topdir=None, func=None, args=[], nest=False, verbose=False):
>        '''  '''
>        allResults = list()
>        # current dir
>        if verbose:
>                print "*** %s" %topdir
>        if topdir is None: topdir = os.getcwd()
>        results = func(topdir, *args)
>        if verbose:
>                print "    %s" % results
>        allResults.extend(results)
>        # possible sub dirs
>        names = [os.path.join(topdir, dir) for dir in os.listdir(topdir)]
>        dirs = [n for n in names if os.path.isdir(n)]
>        if verbose:
>                print "--> %s" % [os.path.basename(d) for d in dirs]
>        if len(dirs) > 0:
>                for dir in dirs:
>                        results = dirWalk(dir, func, args, nest, verbose)
>                        if nest:
>                                allResults.append(results)
>                        else:
>                                allResults.extend(results)
>        # final allResults
>        return allResults
> ========================================================
>
> Example uses to bring the same results, calling dirGlob, would be:
>
> dirWalk("/home/spir/prog/d0", dirGlob, args=["*.txt"], verbose=True) -->
> dirWalk("/home/spir/prog/d0", dirGlob, args=["*.txt"])
> dirWalk("/home/spir/prog/d0", dirGlob, args=["*.txt"], nest=True)
>
> Denis
> ------
> la vita e estrany
> _______________________________________________
> Tutor maillist  -  Tutor at python.org
> http://mail.python.org/mailman/listinfo/tutor
>


More information about the Tutor mailing list