This looks useful. Please post it as a feature request issue with patch on bugs.python.org. Also, if you could include updates to the fnmatch documentation to describe exactly what your code allows that would help. thanks, -Greg On Sat, Dec 6, 2008 at 8:13 PM, Erick Tryzelaar < idadesub@users.sourceforge.net> wrote:
My project needs to extend fnmatch to support zsh-style globbing, where you can use brackets to designate subexpressions. Say you had a directory structure like this:
foo/ foo.ext1 foo.ext2 bar/ foo.ext1 foo.ext2
The subexpressions will let you do patterns like this:
glob.glob('foo/foo.{ext1,ext2}') ['foo/foo.ext1', 'foo/foo.ext2'] glob.glob('foo/foo.ext{1,2}') ['foo/foo.ext1', 'foo/foo.ext2'] glob.glob('{foo,bar}') ['bar', 'foo'] glob.glob('{foo,bar}/foo*') ['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2'] glob.glob('{foo,bar}/foo.{ext*}') ['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2'] glob.glob('{f?o,b?r}/foo.{ext*}') ['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
Would this be interesting to anyone else? It would unfortunately break fnmatch since it currently would ignore with {} in it. It'd be easy to work around that by adding a flag or using a different function name. Anyway, here's the patch against the head of py3k.
-e
Index: Lib/glob.py =================================================================== --- Lib/glob.py (revision 67629) +++ Lib/glob.py (working copy) @@ -72,8 +72,8 @@ return []
-magic_check = re.compile('[*?[]') -magic_check_bytes = re.compile(b'[*?[]') +magic_check = re.compile('[*?[{]') +magic_check_bytes = re.compile(b'[*?[{]')
def has_magic(s): if isinstance(s, bytes): Index: Lib/fnmatch.py =================================================================== --- Lib/fnmatch.py (revision 67629) +++ Lib/fnmatch.py (working copy) @@ -22,10 +22,11 @@
Patterns are Unix shell style:
- * matches everything - ? matches any single character - [seq] matches any character in seq - [!seq] matches any char not in seq + * matches everything + ? matches any single character + [seq] matches any character in seq + [!seq] matches any char not in seq + {pat1,pat2} matches subpattern pat1 or subpattern pat2
An initial period in FILENAME is not special. Both FILENAME and PATTERN are first case-normalized @@ -84,10 +85,15 @@ There is no way to quote meta-characters. """
- i, n = 0, len(pat) + return _translate(0, pat, '')[2] + '$' + +def _translate(i, pat, end): res = '' + n = len(pat) while i < n: c = pat[i] + if c in end: + return i, c, res i = i+1 if c == '*': res = res + '.*' @@ -111,6 +117,27 @@ elif stuff[0] == '^': stuff = '\\' + stuff res = '%s[%s]' % (res, stuff) + elif c == '{': + i, sub = _translate_subexpression(i, pat) + res += sub else: res = res + re.escape(c) - return res + "$" + return i, '', res + +def _translate_subexpression(i, pat): + j = i + subexpressions = [] + while True: + j, c, res = _translate(j, pat, ',}') + subexpressions.append(res) + + if c == ',': + j += 1 + elif c == '}': + j += 1 + break + else: + # turns out we didn't have a subpattern + return j, '{' + ','.join(subexpressions) + + return j, '(' + '|'.join(subexpressions) + ')' Index: Lib/test/test_fnmatch.py =================================================================== --- Lib/test/test_fnmatch.py (revision 67629) +++ Lib/test/test_fnmatch.py (working copy) @@ -37,6 +37,12 @@ check('a', r'[!\]') check('\\', r'[!\]', 0)
+ check('abcdefghi', 'ab{cd,12*}ef{gh?,34}') + check('ab1234ef34', 'ab{cd,12*}ef{gh?,34}') + + check('abcdefgh', 'ab{cd,12*}ef{gh?,34}', 0) + check('ab1234ef345', 'ab{cd,12*}ef{gh?,34}', 0) + def test_mix_bytes_str(self): self.assertRaises(TypeError, fnmatch, 'test', b'*') self.assertRaises(TypeError, fnmatch, b'test', '*') Index: Lib/test/test_glob.py =================================================================== --- Lib/test/test_glob.py (revision 67629) +++ Lib/test/test_glob.py (working copy) @@ -69,6 +69,7 @@ eq(self.glob('aa?'), map(self.norm, ['aaa', 'aab'])) eq(self.glob('aa[ab]'), map(self.norm, ['aaa', 'aab'])) eq(self.glob('*q'), []) + eq(self.glob('a{?a,?b}'), map(self.norm, ['aaa', 'aab']))
def test_glob_nested_directory(self): eq = self.assertSequencesEqual_noorder @@ -89,6 +90,9 @@ [self.norm('a', 'bcd', 'efg', 'ha')]) eq(self.glob('?a?', '*F'), map(self.norm, [os.path.join('aaa', 'zzzF'), os.path.join('aab', 'F')])) + eq(self.glob('a', 'b{c,x}d', '{*}', '*a'), + [self.norm('a', 'bcd', 'efg', 'ha')]) + eq(self.glob('a', 'b{x,y}d', '{*}', '*a'), [])
def test_glob_directory_with_trailing_slash(self): # We are verifying that when there is wildcard pattern which _______________________________________________ Python-ideas mailing list Python-ideas@python.org http://mail.python.org/mailman/listinfo/python-ideas