This looks useful.
Please post it as a feature request issue with patch on bugs.python.org. Also, if you could include updates to the fnmatch documentation to describe exactly what your code allows that would help.
thanks, -Greg
On Sat, Dec 6, 2008 at 8:13 PM, Erick Tryzelaar < idadesub@users.sourceforge.net> wrote:
My project needs to extend fnmatch to support zsh-style globbing, where you can use brackets to designate subexpressions. Say you had a directory structure like this:
foo/ foo.ext1 foo.ext2 bar/ foo.ext1 foo.ext2
The subexpressions will let you do patterns like this:
glob.glob('foo/foo.{ext1,ext2}')
['foo/foo.ext1', 'foo/foo.ext2']
glob.glob('foo/foo.ext{1,2}')
['foo/foo.ext1', 'foo/foo.ext2']
glob.glob('{foo,bar}')
['bar', 'foo']
glob.glob('{foo,bar}/foo*')
['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
glob.glob('{foo,bar}/foo.{ext*}')
['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
glob.glob('{f?o,b?r}/foo.{ext*}')
['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
Would this be interesting to anyone else? It would unfortunately break fnmatch since it currently would ignore with {} in it. It'd be easy to work around that by adding a flag or using a different function name. Anyway, here's the patch against the head of py3k.
-e
Index: Lib/glob.py
--- Lib/glob.py (revision 67629) +++ Lib/glob.py (working copy) @@ -72,8 +72,8 @@ return []
-magic_check = re.compile('[*?[]') -magic_check_bytes = re.compile(b'[*?[]') +magic_check = re.compile('[*?[{]') +magic_check_bytes = re.compile(b'[*?[{]')
def has_magic(s): if isinstance(s, bytes): Index: Lib/fnmatch.py =================================================================== --- Lib/fnmatch.py (revision 67629) +++ Lib/fnmatch.py (working copy) @@ -22,10 +22,11 @@
Patterns are Unix shell style:
matches everything
- ? matches any single character
- [seq] matches any character in seq
- [!seq] matches any char not in seq
matches everything
? matches any single character
[seq] matches any character in seq
[!seq] matches any char not in seq
{pat1,pat2} matches subpattern pat1 or subpattern pat2
An initial period in FILENAME is not special. Both FILENAME and PATTERN are first case-normalized
@@ -84,10 +85,15 @@ There is no way to quote meta-characters. """
- i, n = 0, len(pat)
- return _translate(0, pat, '')[2] + '$'
+def _translate(i, pat, end): res = ''
- n = len(pat) while i < n: c = pat[i]
if c in end:
return i, c, res i = i+1 if c == '*': res = res + '.*'
@@ -111,6 +117,27 @@ elif stuff[0] == '^': stuff = '\' + stuff res = '%s[%s]' % (res, stuff)
elif c == '{':
i, sub = _translate_subexpression(i, pat)
res += sub else: res = res + re.escape(c)
- return res + "$"
- return i, '', res
+def _translate_subexpression(i, pat):
- j = i
- subexpressions = []
- while True:
j, c, res = _translate(j, pat, ',}')
subexpressions.append(res)
if c == ',':
j += 1
elif c == '}':
j += 1
break
else:
# turns out we didn't have a subpattern
return j, '{' + ','.join(subexpressions)
- return j, '(' + '|'.join(subexpressions) + ')'
Index: Lib/test/test_fnmatch.py
--- Lib/test/test_fnmatch.py (revision 67629) +++ Lib/test/test_fnmatch.py (working copy) @@ -37,6 +37,12 @@ check('a', r'[!]') check('\', r'[!]', 0)
check('abcdefghi', 'ab{cd,12*}ef{gh?,34}')
check('ab1234ef34', 'ab{cd,12*}ef{gh?,34}')
check('abcdefgh', 'ab{cd,12*}ef{gh?,34}', 0)
check('ab1234ef345', 'ab{cd,12*}ef{gh?,34}', 0)
- def test_mix_bytes_str(self): self.assertRaises(TypeError, fnmatch, 'test', b'*') self.assertRaises(TypeError, fnmatch, b'test', '*')
Index: Lib/test/test_glob.py
--- Lib/test/test_glob.py (revision 67629) +++ Lib/test/test_glob.py (working copy) @@ -69,6 +69,7 @@ eq(self.glob('aa?'), map(self.norm, ['aaa', 'aab'])) eq(self.glob('aa[ab]'), map(self.norm, ['aaa', 'aab'])) eq(self.glob('*q'), [])
eq(self.glob('a{?a,?b}'), map(self.norm, ['aaa', 'aab']))
def test_glob_nested_directory(self): eq = self.assertSequencesEqual_noorder
@@ -89,6 +90,9 @@ [self.norm('a', 'bcd', 'efg', 'ha')]) eq(self.glob('?a?', '*F'), map(self.norm, [os.path.join('aaa', 'zzzF'), os.path.join('aab', 'F')]))
eq(self.glob('a', 'b{c,x}d', '{*}', '*a'),
[self.norm('a', 'bcd', 'efg', 'ha')])
eq(self.glob('a', 'b{x,y}d', '{*}', '*a'), [])
def test_glob_directory_with_trailing_slash(self): # We are verifying that when there is wildcard pattern which
Python-ideas mailing list Python-ideas@python.org http://mail.python.org/mailman/listinfo/python-ideas