[Python-ideas] Anyone interested in zsh-style subpattern matching for fnmatch/glob?
Gregory P. Smith
greg at krypto.org
Sun Dec 7 05:46:47 CET 2008
This looks useful.
Please post it as a feature request issue with patch on bugs.python.org.
Also, if you could include updates to the fnmatch documentation to describe
exactly what your code allows that would help.
thanks,
-Greg
On Sat, Dec 6, 2008 at 8:13 PM, Erick Tryzelaar <
idadesub at users.sourceforge.net> wrote:
> My project needs to extend fnmatch to support zsh-style globbing,
> where you can use brackets to designate subexpressions. Say you had a
> directory structure like this:
>
> foo/
> foo.ext1
> foo.ext2
> bar/
> foo.ext1
> foo.ext2
>
> The subexpressions will let you do patterns like this:
>
> >>> glob.glob('foo/foo.{ext1,ext2}')
> ['foo/foo.ext1', 'foo/foo.ext2']
> >>> glob.glob('foo/foo.ext{1,2}')
> ['foo/foo.ext1', 'foo/foo.ext2']
> >>> glob.glob('{foo,bar}')
> ['bar', 'foo']
> >>> glob.glob('{foo,bar}/foo*')
> ['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
> >>> glob.glob('{foo,bar}/foo.{ext*}')
> ['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
> >>> glob.glob('{f?o,b?r}/foo.{ext*}')
> ['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
>
>
> Would this be interesting to anyone else? It would unfortunately break
> fnmatch since it currently would ignore with {} in it. It'd be easy to
> work around that by adding a flag or using a different function name.
> Anyway, here's the patch against the head of py3k.
>
> -e
>
>
>
> Index: Lib/glob.py
> ===================================================================
> --- Lib/glob.py (revision 67629)
> +++ Lib/glob.py (working copy)
> @@ -72,8 +72,8 @@
> return []
>
>
> -magic_check = re.compile('[*?[]')
> -magic_check_bytes = re.compile(b'[*?[]')
> +magic_check = re.compile('[*?[{]')
> +magic_check_bytes = re.compile(b'[*?[{]')
>
> def has_magic(s):
> if isinstance(s, bytes):
> Index: Lib/fnmatch.py
> ===================================================================
> --- Lib/fnmatch.py (revision 67629)
> +++ Lib/fnmatch.py (working copy)
> @@ -22,10 +22,11 @@
>
> Patterns are Unix shell style:
>
> - * matches everything
> - ? matches any single character
> - [seq] matches any character in seq
> - [!seq] matches any char not in seq
> + * matches everything
> + ? matches any single character
> + [seq] matches any character in seq
> + [!seq] matches any char not in seq
> + {pat1,pat2} matches subpattern pat1 or subpattern pat2
>
> An initial period in FILENAME is not special.
> Both FILENAME and PATTERN are first case-normalized
> @@ -84,10 +85,15 @@
> There is no way to quote meta-characters.
> """
>
> - i, n = 0, len(pat)
> + return _translate(0, pat, '')[2] + '$'
> +
> +def _translate(i, pat, end):
> res = ''
> + n = len(pat)
> while i < n:
> c = pat[i]
> + if c in end:
> + return i, c, res
> i = i+1
> if c == '*':
> res = res + '.*'
> @@ -111,6 +117,27 @@
> elif stuff[0] == '^':
> stuff = '\\' + stuff
> res = '%s[%s]' % (res, stuff)
> + elif c == '{':
> + i, sub = _translate_subexpression(i, pat)
> + res += sub
> else:
> res = res + re.escape(c)
> - return res + "$"
> + return i, '', res
> +
> +def _translate_subexpression(i, pat):
> + j = i
> + subexpressions = []
> + while True:
> + j, c, res = _translate(j, pat, ',}')
> + subexpressions.append(res)
> +
> + if c == ',':
> + j += 1
> + elif c == '}':
> + j += 1
> + break
> + else:
> + # turns out we didn't have a subpattern
> + return j, '{' + ','.join(subexpressions)
> +
> + return j, '(' + '|'.join(subexpressions) + ')'
> Index: Lib/test/test_fnmatch.py
> ===================================================================
> --- Lib/test/test_fnmatch.py (revision 67629)
> +++ Lib/test/test_fnmatch.py (working copy)
> @@ -37,6 +37,12 @@
> check('a', r'[!\]')
> check('\\', r'[!\]', 0)
>
> + check('abcdefghi', 'ab{cd,12*}ef{gh?,34}')
> + check('ab1234ef34', 'ab{cd,12*}ef{gh?,34}')
> +
> + check('abcdefgh', 'ab{cd,12*}ef{gh?,34}', 0)
> + check('ab1234ef345', 'ab{cd,12*}ef{gh?,34}', 0)
> +
> def test_mix_bytes_str(self):
> self.assertRaises(TypeError, fnmatch, 'test', b'*')
> self.assertRaises(TypeError, fnmatch, b'test', '*')
> Index: Lib/test/test_glob.py
> ===================================================================
> --- Lib/test/test_glob.py (revision 67629)
> +++ Lib/test/test_glob.py (working copy)
> @@ -69,6 +69,7 @@
> eq(self.glob('aa?'), map(self.norm, ['aaa', 'aab']))
> eq(self.glob('aa[ab]'), map(self.norm, ['aaa', 'aab']))
> eq(self.glob('*q'), [])
> + eq(self.glob('a{?a,?b}'), map(self.norm, ['aaa', 'aab']))
>
> def test_glob_nested_directory(self):
> eq = self.assertSequencesEqual_noorder
> @@ -89,6 +90,9 @@
> [self.norm('a', 'bcd', 'efg', 'ha')])
> eq(self.glob('?a?', '*F'), map(self.norm, [os.path.join('aaa',
> 'zzzF'),
> os.path.join('aab',
> 'F')]))
> + eq(self.glob('a', 'b{c,x}d', '{*}', '*a'),
> + [self.norm('a', 'bcd', 'efg', 'ha')])
> + eq(self.glob('a', 'b{x,y}d', '{*}', '*a'), [])
>
> def test_glob_directory_with_trailing_slash(self):
> # We are verifying that when there is wildcard pattern which
> _______________________________________________
> Python-ideas mailing list
> Python-ideas at python.org
> http://mail.python.org/mailman/listinfo/python-ideas
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/python-ideas/attachments/20081206/62a67f99/attachment.html>
More information about the Python-ideas
mailing list