[Python-ideas] Anyone interested in zsh-style subpattern matching for fnmatch/glob?

Gregory P. Smith greg at krypto.org
Sun Dec 7 05:46:47 CET 2008


This looks useful.

Please post it as a feature request issue with patch on bugs.python.org.
Also, if you could include updates to the fnmatch documentation to describe
exactly what your code allows that would help.

thanks,
-Greg

On Sat, Dec 6, 2008 at 8:13 PM, Erick Tryzelaar <
idadesub at users.sourceforge.net> wrote:

> My project needs to extend fnmatch to support zsh-style globbing,
> where you can use brackets to designate subexpressions. Say you had a
> directory structure like this:
>
> foo/
>  foo.ext1
>  foo.ext2
> bar/
>  foo.ext1
>  foo.ext2
>
> The subexpressions will let you do patterns like this:
>
> >>> glob.glob('foo/foo.{ext1,ext2}')
> ['foo/foo.ext1', 'foo/foo.ext2']
> >>> glob.glob('foo/foo.ext{1,2}')
> ['foo/foo.ext1', 'foo/foo.ext2']
> >>> glob.glob('{foo,bar}')
> ['bar', 'foo']
> >>> glob.glob('{foo,bar}/foo*')
> ['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
> >>> glob.glob('{foo,bar}/foo.{ext*}')
> ['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
> >>> glob.glob('{f?o,b?r}/foo.{ext*}')
> ['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
>
>
> Would this be interesting to anyone else? It would unfortunately break
> fnmatch since it currently would ignore with {} in it. It'd be easy to
> work around that by adding a flag or using a different function name.
> Anyway, here's the patch against the head of py3k.
>
> -e
>
>
>
> Index: Lib/glob.py
> ===================================================================
> --- Lib/glob.py (revision 67629)
> +++ Lib/glob.py (working copy)
> @@ -72,8 +72,8 @@
>     return []
>
>
> -magic_check = re.compile('[*?[]')
> -magic_check_bytes = re.compile(b'[*?[]')
> +magic_check = re.compile('[*?[{]')
> +magic_check_bytes = re.compile(b'[*?[{]')
>
>  def has_magic(s):
>     if isinstance(s, bytes):
> Index: Lib/fnmatch.py
> ===================================================================
> --- Lib/fnmatch.py      (revision 67629)
> +++ Lib/fnmatch.py      (working copy)
> @@ -22,10 +22,11 @@
>
>     Patterns are Unix shell style:
>
> -    *       matches everything
> -    ?       matches any single character
> -    [seq]   matches any character in seq
> -    [!seq]  matches any char not in seq
> +    *           matches everything
> +    ?           matches any single character
> +    [seq]       matches any character in seq
> +    [!seq]      matches any char not in seq
> +    {pat1,pat2} matches subpattern pat1 or subpattern pat2
>
>     An initial period in FILENAME is not special.
>     Both FILENAME and PATTERN are first case-normalized
> @@ -84,10 +85,15 @@
>     There is no way to quote meta-characters.
>     """
>
> -    i, n = 0, len(pat)
> +    return _translate(0, pat, '')[2] + '$'
> +
> +def _translate(i, pat, end):
>     res = ''
> +    n = len(pat)
>     while i < n:
>         c = pat[i]
> +        if c in end:
> +            return i, c, res
>         i = i+1
>         if c == '*':
>             res = res + '.*'
> @@ -111,6 +117,27 @@
>                 elif stuff[0] == '^':
>                     stuff = '\\' + stuff
>                 res = '%s[%s]' % (res, stuff)
> +        elif c == '{':
> +            i, sub = _translate_subexpression(i, pat)
> +            res += sub
>         else:
>             res = res + re.escape(c)
> -    return res + "$"
> +    return i, '', res
> +
> +def _translate_subexpression(i, pat):
> +    j = i
> +    subexpressions = []
> +    while True:
> +        j, c, res = _translate(j, pat, ',}')
> +        subexpressions.append(res)
> +
> +        if c == ',':
> +            j += 1
> +        elif c == '}':
> +            j += 1
> +            break
> +        else:
> +            # turns out we didn't have a subpattern
> +            return j, '{' + ','.join(subexpressions)
> +
> +    return j, '(' + '|'.join(subexpressions) + ')'
> Index: Lib/test/test_fnmatch.py
> ===================================================================
> --- Lib/test/test_fnmatch.py    (revision 67629)
> +++ Lib/test/test_fnmatch.py    (working copy)
> @@ -37,6 +37,12 @@
>         check('a', r'[!\]')
>         check('\\', r'[!\]', 0)
>
> +        check('abcdefghi', 'ab{cd,12*}ef{gh?,34}')
> +        check('ab1234ef34', 'ab{cd,12*}ef{gh?,34}')
> +
> +        check('abcdefgh', 'ab{cd,12*}ef{gh?,34}', 0)
> +        check('ab1234ef345', 'ab{cd,12*}ef{gh?,34}', 0)
> +
>     def test_mix_bytes_str(self):
>         self.assertRaises(TypeError, fnmatch, 'test', b'*')
>         self.assertRaises(TypeError, fnmatch, b'test', '*')
> Index: Lib/test/test_glob.py
> ===================================================================
> --- Lib/test/test_glob.py       (revision 67629)
> +++ Lib/test/test_glob.py       (working copy)
> @@ -69,6 +69,7 @@
>         eq(self.glob('aa?'), map(self.norm, ['aaa', 'aab']))
>         eq(self.glob('aa[ab]'), map(self.norm, ['aaa', 'aab']))
>         eq(self.glob('*q'), [])
> +        eq(self.glob('a{?a,?b}'), map(self.norm, ['aaa', 'aab']))
>
>     def test_glob_nested_directory(self):
>         eq = self.assertSequencesEqual_noorder
> @@ -89,6 +90,9 @@
>            [self.norm('a', 'bcd', 'efg', 'ha')])
>         eq(self.glob('?a?', '*F'), map(self.norm, [os.path.join('aaa',
> 'zzzF'),
>                                                    os.path.join('aab',
> 'F')]))
> +        eq(self.glob('a', 'b{c,x}d', '{*}', '*a'),
> +           [self.norm('a', 'bcd', 'efg', 'ha')])
> +        eq(self.glob('a', 'b{x,y}d', '{*}', '*a'), [])
>
>     def test_glob_directory_with_trailing_slash(self):
>         # We are verifying that when there is wildcard pattern which
> _______________________________________________
> Python-ideas mailing list
> Python-ideas at python.org
> http://mail.python.org/mailman/listinfo/python-ideas
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/python-ideas/attachments/20081206/62a67f99/attachment.html>


More information about the Python-ideas mailing list