Anyone interested in zsh-style subpattern matching for fnmatch/glob?

My project needs to extend fnmatch to support zsh-style globbing, where you can use brackets to designate subexpressions. Say you had a directory structure like this: foo/ foo.ext1 foo.ext2 bar/ foo.ext1 foo.ext2 The subexpressions will let you do patterns like this:
glob.glob('foo/foo.{ext1,ext2}') ['foo/foo.ext1', 'foo/foo.ext2'] glob.glob('foo/foo.ext{1,2}') ['foo/foo.ext1', 'foo/foo.ext2'] glob.glob('{foo,bar}') ['bar', 'foo'] glob.glob('{foo,bar}/foo*') ['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2'] glob.glob('{foo,bar}/foo.{ext*}') ['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2'] glob.glob('{f?o,b?r}/foo.{ext*}') ['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
Would this be interesting to anyone else? It would unfortunately break fnmatch since it currently would ignore with {} in it. It'd be easy to work around that by adding a flag or using a different function name. Anyway, here's the patch against the head of py3k. -e Index: Lib/glob.py =================================================================== --- Lib/glob.py (revision 67629) +++ Lib/glob.py (working copy) @@ -72,8 +72,8 @@ return [] -magic_check = re.compile('[*?[]') -magic_check_bytes = re.compile(b'[*?[]') +magic_check = re.compile('[*?[{]') +magic_check_bytes = re.compile(b'[*?[{]') def has_magic(s): if isinstance(s, bytes): Index: Lib/fnmatch.py =================================================================== --- Lib/fnmatch.py (revision 67629) +++ Lib/fnmatch.py (working copy) @@ -22,10 +22,11 @@ Patterns are Unix shell style: - * matches everything - ? matches any single character - [seq] matches any character in seq - [!seq] matches any char not in seq + * matches everything + ? matches any single character + [seq] matches any character in seq + [!seq] matches any char not in seq + {pat1,pat2} matches subpattern pat1 or subpattern pat2 An initial period in FILENAME is not special. Both FILENAME and PATTERN are first case-normalized @@ -84,10 +85,15 @@ There is no way to quote meta-characters. """ - i, n = 0, len(pat) + return _translate(0, pat, '')[2] + '$' + +def _translate(i, pat, end): res = '' + n = len(pat) while i < n: c = pat[i] + if c in end: + return i, c, res i = i+1 if c == '*': res = res + '.*' @@ -111,6 +117,27 @@ elif stuff[0] == '^': stuff = '\\' + stuff res = '%s[%s]' % (res, stuff) + elif c == '{': + i, sub = _translate_subexpression(i, pat) + res += sub else: res = res + re.escape(c) - return res + "$" + return i, '', res + +def _translate_subexpression(i, pat): + j = i + subexpressions = [] + while True: + j, c, res = _translate(j, pat, ',}') + subexpressions.append(res) + + if c == ',': + j += 1 + elif c == '}': + j += 1 + break + else: + # turns out we didn't have a subpattern + return j, '{' + ','.join(subexpressions) + + return j, '(' + '|'.join(subexpressions) + ')' Index: Lib/test/test_fnmatch.py =================================================================== --- Lib/test/test_fnmatch.py (revision 67629) +++ Lib/test/test_fnmatch.py (working copy) @@ -37,6 +37,12 @@ check('a', r'[!\]') check('\\', r'[!\]', 0) + check('abcdefghi', 'ab{cd,12*}ef{gh?,34}') + check('ab1234ef34', 'ab{cd,12*}ef{gh?,34}') + + check('abcdefgh', 'ab{cd,12*}ef{gh?,34}', 0) + check('ab1234ef345', 'ab{cd,12*}ef{gh?,34}', 0) + def test_mix_bytes_str(self): self.assertRaises(TypeError, fnmatch, 'test', b'*') self.assertRaises(TypeError, fnmatch, b'test', '*') Index: Lib/test/test_glob.py =================================================================== --- Lib/test/test_glob.py (revision 67629) +++ Lib/test/test_glob.py (working copy) @@ -69,6 +69,7 @@ eq(self.glob('aa?'), map(self.norm, ['aaa', 'aab'])) eq(self.glob('aa[ab]'), map(self.norm, ['aaa', 'aab'])) eq(self.glob('*q'), []) + eq(self.glob('a{?a,?b}'), map(self.norm, ['aaa', 'aab'])) def test_glob_nested_directory(self): eq = self.assertSequencesEqual_noorder @@ -89,6 +90,9 @@ [self.norm('a', 'bcd', 'efg', 'ha')]) eq(self.glob('?a?', '*F'), map(self.norm, [os.path.join('aaa', 'zzzF'), os.path.join('aab', 'F')])) + eq(self.glob('a', 'b{c,x}d', '{*}', '*a'), + [self.norm('a', 'bcd', 'efg', 'ha')]) + eq(self.glob('a', 'b{x,y}d', '{*}', '*a'), []) def test_glob_directory_with_trailing_slash(self): # We are verifying that when there is wildcard pattern which

This looks useful. Please post it as a feature request issue with patch on bugs.python.org. Also, if you could include updates to the fnmatch documentation to describe exactly what your code allows that would help. thanks, -Greg On Sat, Dec 6, 2008 at 8:13 PM, Erick Tryzelaar < idadesub@users.sourceforge.net> wrote:
My project needs to extend fnmatch to support zsh-style globbing, where you can use brackets to designate subexpressions. Say you had a directory structure like this:
foo/ foo.ext1 foo.ext2 bar/ foo.ext1 foo.ext2
The subexpressions will let you do patterns like this:
glob.glob('foo/foo.{ext1,ext2}') ['foo/foo.ext1', 'foo/foo.ext2'] glob.glob('foo/foo.ext{1,2}') ['foo/foo.ext1', 'foo/foo.ext2'] glob.glob('{foo,bar}') ['bar', 'foo'] glob.glob('{foo,bar}/foo*') ['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2'] glob.glob('{foo,bar}/foo.{ext*}') ['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2'] glob.glob('{f?o,b?r}/foo.{ext*}') ['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
Would this be interesting to anyone else? It would unfortunately break fnmatch since it currently would ignore with {} in it. It'd be easy to work around that by adding a flag or using a different function name. Anyway, here's the patch against the head of py3k.
-e
Index: Lib/glob.py =================================================================== --- Lib/glob.py (revision 67629) +++ Lib/glob.py (working copy) @@ -72,8 +72,8 @@ return []
-magic_check = re.compile('[*?[]') -magic_check_bytes = re.compile(b'[*?[]') +magic_check = re.compile('[*?[{]') +magic_check_bytes = re.compile(b'[*?[{]')
def has_magic(s): if isinstance(s, bytes): Index: Lib/fnmatch.py =================================================================== --- Lib/fnmatch.py (revision 67629) +++ Lib/fnmatch.py (working copy) @@ -22,10 +22,11 @@
Patterns are Unix shell style:
- * matches everything - ? matches any single character - [seq] matches any character in seq - [!seq] matches any char not in seq + * matches everything + ? matches any single character + [seq] matches any character in seq + [!seq] matches any char not in seq + {pat1,pat2} matches subpattern pat1 or subpattern pat2
An initial period in FILENAME is not special. Both FILENAME and PATTERN are first case-normalized @@ -84,10 +85,15 @@ There is no way to quote meta-characters. """
- i, n = 0, len(pat) + return _translate(0, pat, '')[2] + '$' + +def _translate(i, pat, end): res = '' + n = len(pat) while i < n: c = pat[i] + if c in end: + return i, c, res i = i+1 if c == '*': res = res + '.*' @@ -111,6 +117,27 @@ elif stuff[0] == '^': stuff = '\\' + stuff res = '%s[%s]' % (res, stuff) + elif c == '{': + i, sub = _translate_subexpression(i, pat) + res += sub else: res = res + re.escape(c) - return res + "$" + return i, '', res + +def _translate_subexpression(i, pat): + j = i + subexpressions = [] + while True: + j, c, res = _translate(j, pat, ',}') + subexpressions.append(res) + + if c == ',': + j += 1 + elif c == '}': + j += 1 + break + else: + # turns out we didn't have a subpattern + return j, '{' + ','.join(subexpressions) + + return j, '(' + '|'.join(subexpressions) + ')' Index: Lib/test/test_fnmatch.py =================================================================== --- Lib/test/test_fnmatch.py (revision 67629) +++ Lib/test/test_fnmatch.py (working copy) @@ -37,6 +37,12 @@ check('a', r'[!\]') check('\\', r'[!\]', 0)
+ check('abcdefghi', 'ab{cd,12*}ef{gh?,34}') + check('ab1234ef34', 'ab{cd,12*}ef{gh?,34}') + + check('abcdefgh', 'ab{cd,12*}ef{gh?,34}', 0) + check('ab1234ef345', 'ab{cd,12*}ef{gh?,34}', 0) + def test_mix_bytes_str(self): self.assertRaises(TypeError, fnmatch, 'test', b'*') self.assertRaises(TypeError, fnmatch, b'test', '*') Index: Lib/test/test_glob.py =================================================================== --- Lib/test/test_glob.py (revision 67629) +++ Lib/test/test_glob.py (working copy) @@ -69,6 +69,7 @@ eq(self.glob('aa?'), map(self.norm, ['aaa', 'aab'])) eq(self.glob('aa[ab]'), map(self.norm, ['aaa', 'aab'])) eq(self.glob('*q'), []) + eq(self.glob('a{?a,?b}'), map(self.norm, ['aaa', 'aab']))
def test_glob_nested_directory(self): eq = self.assertSequencesEqual_noorder @@ -89,6 +90,9 @@ [self.norm('a', 'bcd', 'efg', 'ha')]) eq(self.glob('?a?', '*F'), map(self.norm, [os.path.join('aaa', 'zzzF'), os.path.join('aab', 'F')])) + eq(self.glob('a', 'b{c,x}d', '{*}', '*a'), + [self.norm('a', 'bcd', 'efg', 'ha')]) + eq(self.glob('a', 'b{x,y}d', '{*}', '*a'), [])
def test_glob_directory_with_trailing_slash(self): # We are verifying that when there is wildcard pattern which _______________________________________________ Python-ideas mailing list Python-ideas@python.org http://mail.python.org/mailman/listinfo/python-ideas

On Sat, Dec 6, 2008 at 8:46 PM, Gregory P. Smith <greg@krypto.org> wrote:
This looks useful.
Please post it as a feature request issue with patch on bugs.python.org. Also, if you could include updates to the fnmatch documentation to describe exactly what your code allows that would help.
Thanks Greg. I've made issue4573 to track this.

Backported to 2.7 as I think it is just applicable there. On Sun, Dec 7, 2008 at 3:19 AM, Erick Tryzelaar <idadesub@users.sourceforge.net> wrote:
On Sat, Dec 6, 2008 at 8:46 PM, Gregory P. Smith <greg@krypto.org> wrote:
This looks useful.
Please post it as a feature request issue with patch on bugs.python.org. Also, if you could include updates to the fnmatch documentation to describe exactly what your code allows that would help.
Thanks Greg. I've made issue4573 to track this. _______________________________________________ Python-ideas mailing list Python-ideas@python.org http://mail.python.org/mailman/listinfo/python-ideas
-- Read my blog! I depend on your acceptance of my opinion! I am interesting! http://techblog.ironfroggy.com/ Follow me if you're into that sort of thing: http://www.twitter.com/ironfroggy
participants (3)
-
Calvin Spealman
-
Erick Tryzelaar
-
Gregory P. Smith