[Python-ideas] Anyone interested in zsh-style subpattern matching for fnmatch/glob?

Erick Tryzelaar idadesub at users.sourceforge.net
Sun Dec 7 05:13:07 CET 2008


My project needs to extend fnmatch to support zsh-style globbing,
where you can use brackets to designate subexpressions. Say you had a
directory structure like this:

foo/
  foo.ext1
  foo.ext2
bar/
  foo.ext1
  foo.ext2

The subexpressions will let you do patterns like this:

>>> glob.glob('foo/foo.{ext1,ext2}')
['foo/foo.ext1', 'foo/foo.ext2']
>>> glob.glob('foo/foo.ext{1,2}')
['foo/foo.ext1', 'foo/foo.ext2']
>>> glob.glob('{foo,bar}')
['bar', 'foo']
>>> glob.glob('{foo,bar}/foo*')
['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
>>> glob.glob('{foo,bar}/foo.{ext*}')
['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
>>> glob.glob('{f?o,b?r}/foo.{ext*}')
['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']


Would this be interesting to anyone else? It would unfortunately break
fnmatch since it currently would ignore with {} in it. It'd be easy to
work around that by adding a flag or using a different function name.
Anyway, here's the patch against the head of py3k.

-e



Index: Lib/glob.py
===================================================================
--- Lib/glob.py	(revision 67629)
+++ Lib/glob.py	(working copy)
@@ -72,8 +72,8 @@
     return []


-magic_check = re.compile('[*?[]')
-magic_check_bytes = re.compile(b'[*?[]')
+magic_check = re.compile('[*?[{]')
+magic_check_bytes = re.compile(b'[*?[{]')

 def has_magic(s):
     if isinstance(s, bytes):
Index: Lib/fnmatch.py
===================================================================
--- Lib/fnmatch.py	(revision 67629)
+++ Lib/fnmatch.py	(working copy)
@@ -22,10 +22,11 @@

     Patterns are Unix shell style:

-    *       matches everything
-    ?       matches any single character
-    [seq]   matches any character in seq
-    [!seq]  matches any char not in seq
+    *           matches everything
+    ?           matches any single character
+    [seq]       matches any character in seq
+    [!seq]      matches any char not in seq
+    {pat1,pat2} matches subpattern pat1 or subpattern pat2

     An initial period in FILENAME is not special.
     Both FILENAME and PATTERN are first case-normalized
@@ -84,10 +85,15 @@
     There is no way to quote meta-characters.
     """

-    i, n = 0, len(pat)
+    return _translate(0, pat, '')[2] + '$'
+
+def _translate(i, pat, end):
     res = ''
+    n = len(pat)
     while i < n:
         c = pat[i]
+        if c in end:
+            return i, c, res
         i = i+1
         if c == '*':
             res = res + '.*'
@@ -111,6 +117,27 @@
                 elif stuff[0] == '^':
                     stuff = '\\' + stuff
                 res = '%s[%s]' % (res, stuff)
+        elif c == '{':
+            i, sub = _translate_subexpression(i, pat)
+            res += sub
         else:
             res = res + re.escape(c)
-    return res + "$"
+    return i, '', res
+
+def _translate_subexpression(i, pat):
+    j = i
+    subexpressions = []
+    while True:
+        j, c, res = _translate(j, pat, ',}')
+        subexpressions.append(res)
+
+        if c == ',':
+            j += 1
+        elif c == '}':
+            j += 1
+            break
+        else:
+            # turns out we didn't have a subpattern
+            return j, '{' + ','.join(subexpressions)
+
+    return j, '(' + '|'.join(subexpressions) + ')'
Index: Lib/test/test_fnmatch.py
===================================================================
--- Lib/test/test_fnmatch.py	(revision 67629)
+++ Lib/test/test_fnmatch.py	(working copy)
@@ -37,6 +37,12 @@
         check('a', r'[!\]')
         check('\\', r'[!\]', 0)

+        check('abcdefghi', 'ab{cd,12*}ef{gh?,34}')
+        check('ab1234ef34', 'ab{cd,12*}ef{gh?,34}')
+
+        check('abcdefgh', 'ab{cd,12*}ef{gh?,34}', 0)
+        check('ab1234ef345', 'ab{cd,12*}ef{gh?,34}', 0)
+
     def test_mix_bytes_str(self):
         self.assertRaises(TypeError, fnmatch, 'test', b'*')
         self.assertRaises(TypeError, fnmatch, b'test', '*')
Index: Lib/test/test_glob.py
===================================================================
--- Lib/test/test_glob.py	(revision 67629)
+++ Lib/test/test_glob.py	(working copy)
@@ -69,6 +69,7 @@
         eq(self.glob('aa?'), map(self.norm, ['aaa', 'aab']))
         eq(self.glob('aa[ab]'), map(self.norm, ['aaa', 'aab']))
         eq(self.glob('*q'), [])
+        eq(self.glob('a{?a,?b}'), map(self.norm, ['aaa', 'aab']))

     def test_glob_nested_directory(self):
         eq = self.assertSequencesEqual_noorder
@@ -89,6 +90,9 @@
            [self.norm('a', 'bcd', 'efg', 'ha')])
         eq(self.glob('?a?', '*F'), map(self.norm, [os.path.join('aaa', 'zzzF'),
                                                    os.path.join('aab', 'F')]))
+        eq(self.glob('a', 'b{c,x}d', '{*}', '*a'),
+           [self.norm('a', 'bcd', 'efg', 'ha')])
+        eq(self.glob('a', 'b{x,y}d', '{*}', '*a'), [])

     def test_glob_directory_with_trailing_slash(self):
         # We are verifying that when there is wildcard pattern which



More information about the Python-ideas mailing list