[Python-ideas] Anyone interested in zsh-style subpattern matching for fnmatch/glob?
Erick Tryzelaar
idadesub at users.sourceforge.net
Sun Dec 7 05:13:07 CET 2008
My project needs to extend fnmatch to support zsh-style globbing,
where you can use brackets to designate subexpressions. Say you had a
directory structure like this:
foo/
foo.ext1
foo.ext2
bar/
foo.ext1
foo.ext2
The subexpressions will let you do patterns like this:
>>> glob.glob('foo/foo.{ext1,ext2}')
['foo/foo.ext1', 'foo/foo.ext2']
>>> glob.glob('foo/foo.ext{1,2}')
['foo/foo.ext1', 'foo/foo.ext2']
>>> glob.glob('{foo,bar}')
['bar', 'foo']
>>> glob.glob('{foo,bar}/foo*')
['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
>>> glob.glob('{foo,bar}/foo.{ext*}')
['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
>>> glob.glob('{f?o,b?r}/foo.{ext*}')
['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
Would this be interesting to anyone else? It would unfortunately break
fnmatch since it currently would ignore with {} in it. It'd be easy to
work around that by adding a flag or using a different function name.
Anyway, here's the patch against the head of py3k.
-e
Index: Lib/glob.py
===================================================================
--- Lib/glob.py (revision 67629)
+++ Lib/glob.py (working copy)
@@ -72,8 +72,8 @@
return []
-magic_check = re.compile('[*?[]')
-magic_check_bytes = re.compile(b'[*?[]')
+magic_check = re.compile('[*?[{]')
+magic_check_bytes = re.compile(b'[*?[{]')
def has_magic(s):
if isinstance(s, bytes):
Index: Lib/fnmatch.py
===================================================================
--- Lib/fnmatch.py (revision 67629)
+++ Lib/fnmatch.py (working copy)
@@ -22,10 +22,11 @@
Patterns are Unix shell style:
- * matches everything
- ? matches any single character
- [seq] matches any character in seq
- [!seq] matches any char not in seq
+ * matches everything
+ ? matches any single character
+ [seq] matches any character in seq
+ [!seq] matches any char not in seq
+ {pat1,pat2} matches subpattern pat1 or subpattern pat2
An initial period in FILENAME is not special.
Both FILENAME and PATTERN are first case-normalized
@@ -84,10 +85,15 @@
There is no way to quote meta-characters.
"""
- i, n = 0, len(pat)
+ return _translate(0, pat, '')[2] + '$'
+
+def _translate(i, pat, end):
res = ''
+ n = len(pat)
while i < n:
c = pat[i]
+ if c in end:
+ return i, c, res
i = i+1
if c == '*':
res = res + '.*'
@@ -111,6 +117,27 @@
elif stuff[0] == '^':
stuff = '\\' + stuff
res = '%s[%s]' % (res, stuff)
+ elif c == '{':
+ i, sub = _translate_subexpression(i, pat)
+ res += sub
else:
res = res + re.escape(c)
- return res + "$"
+ return i, '', res
+
+def _translate_subexpression(i, pat):
+ j = i
+ subexpressions = []
+ while True:
+ j, c, res = _translate(j, pat, ',}')
+ subexpressions.append(res)
+
+ if c == ',':
+ j += 1
+ elif c == '}':
+ j += 1
+ break
+ else:
+ # turns out we didn't have a subpattern
+ return j, '{' + ','.join(subexpressions)
+
+ return j, '(' + '|'.join(subexpressions) + ')'
Index: Lib/test/test_fnmatch.py
===================================================================
--- Lib/test/test_fnmatch.py (revision 67629)
+++ Lib/test/test_fnmatch.py (working copy)
@@ -37,6 +37,12 @@
check('a', r'[!\]')
check('\\', r'[!\]', 0)
+ check('abcdefghi', 'ab{cd,12*}ef{gh?,34}')
+ check('ab1234ef34', 'ab{cd,12*}ef{gh?,34}')
+
+ check('abcdefgh', 'ab{cd,12*}ef{gh?,34}', 0)
+ check('ab1234ef345', 'ab{cd,12*}ef{gh?,34}', 0)
+
def test_mix_bytes_str(self):
self.assertRaises(TypeError, fnmatch, 'test', b'*')
self.assertRaises(TypeError, fnmatch, b'test', '*')
Index: Lib/test/test_glob.py
===================================================================
--- Lib/test/test_glob.py (revision 67629)
+++ Lib/test/test_glob.py (working copy)
@@ -69,6 +69,7 @@
eq(self.glob('aa?'), map(self.norm, ['aaa', 'aab']))
eq(self.glob('aa[ab]'), map(self.norm, ['aaa', 'aab']))
eq(self.glob('*q'), [])
+ eq(self.glob('a{?a,?b}'), map(self.norm, ['aaa', 'aab']))
def test_glob_nested_directory(self):
eq = self.assertSequencesEqual_noorder
@@ -89,6 +90,9 @@
[self.norm('a', 'bcd', 'efg', 'ha')])
eq(self.glob('?a?', '*F'), map(self.norm, [os.path.join('aaa', 'zzzF'),
os.path.join('aab', 'F')]))
+ eq(self.glob('a', 'b{c,x}d', '{*}', '*a'),
+ [self.norm('a', 'bcd', 'efg', 'ha')])
+ eq(self.glob('a', 'b{x,y}d', '{*}', '*a'), [])
def test_glob_directory_with_trailing_slash(self):
# We are verifying that when there is wildcard pattern which
More information about the Python-ideas
mailing list