This looks useful.

Please post it as a feature request issue with patch on bugs.python.org.  Also, if you could include updates to the fnmatch documentation to describe exactly what your code allows that would help.

thanks,
-Greg

On Sat, Dec 6, 2008 at 8:13 PM, Erick Tryzelaar <idadesub@users.sourceforge.net> wrote:
My project needs to extend fnmatch to support zsh-style globbing,
where you can use brackets to designate subexpressions. Say you had a
directory structure like this:

foo/
 foo.ext1
 foo.ext2
bar/
 foo.ext1
 foo.ext2

The subexpressions will let you do patterns like this:

>>> glob.glob('foo/foo.{ext1,ext2}')
['foo/foo.ext1', 'foo/foo.ext2']
>>> glob.glob('foo/foo.ext{1,2}')
['foo/foo.ext1', 'foo/foo.ext2']
>>> glob.glob('{foo,bar}')
['bar', 'foo']
>>> glob.glob('{foo,bar}/foo*')
['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
>>> glob.glob('{foo,bar}/foo.{ext*}')
['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']
>>> glob.glob('{f?o,b?r}/foo.{ext*}')
['bar/foo.ext1', 'bar/foo.ext2', 'foo/foo.ext1', 'foo/foo.ext2']


Would this be interesting to anyone else? It would unfortunately break
fnmatch since it currently would ignore with {} in it. It'd be easy to
work around that by adding a flag or using a different function name.
Anyway, here's the patch against the head of py3k.

-e



Index: Lib/glob.py
===================================================================
--- Lib/glob.py (revision 67629)
+++ Lib/glob.py (working copy)
@@ -72,8 +72,8 @@
    return []


-magic_check = re.compile('[*?[]')
-magic_check_bytes = re.compile(b'[*?[]')
+magic_check = re.compile('[*?[{]')
+magic_check_bytes = re.compile(b'[*?[{]')

 def has_magic(s):
    if isinstance(s, bytes):
Index: Lib/fnmatch.py
===================================================================
--- Lib/fnmatch.py      (revision 67629)
+++ Lib/fnmatch.py      (working copy)
@@ -22,10 +22,11 @@

    Patterns are Unix shell style:

-    *       matches everything
-    ?       matches any single character
-    [seq]   matches any character in seq
-    [!seq]  matches any char not in seq
+    *           matches everything
+    ?           matches any single character
+    [seq]       matches any character in seq
+    [!seq]      matches any char not in seq
+    {pat1,pat2} matches subpattern pat1 or subpattern pat2

    An initial period in FILENAME is not special.
    Both FILENAME and PATTERN are first case-normalized
@@ -84,10 +85,15 @@
    There is no way to quote meta-characters.
    """

-    i, n = 0, len(pat)
+    return _translate(0, pat, '')[2] + '$'
+
+def _translate(i, pat, end):
    res = ''
+    n = len(pat)
    while i < n:
        c = pat[i]
+        if c in end:
+            return i, c, res
        i = i+1
        if c == '*':
            res = res + '.*'
@@ -111,6 +117,27 @@
                elif stuff[0] == '^':
                    stuff = '\\' + stuff
                res = '%s[%s]' % (res, stuff)
+        elif c == '{':
+            i, sub = _translate_subexpression(i, pat)
+            res += sub
        else:
            res = res + re.escape(c)
-    return res + "$"
+    return i, '', res
+
+def _translate_subexpression(i, pat):
+    j = i
+    subexpressions = []
+    while True:
+        j, c, res = _translate(j, pat, ',}')
+        subexpressions.append(res)
+
+        if c == ',':
+            j += 1
+        elif c == '}':
+            j += 1
+            break
+        else:
+            # turns out we didn't have a subpattern
+            return j, '{' + ','.join(subexpressions)
+
+    return j, '(' + '|'.join(subexpressions) + ')'
Index: Lib/test/test_fnmatch.py
===================================================================
--- Lib/test/test_fnmatch.py    (revision 67629)
+++ Lib/test/test_fnmatch.py    (working copy)
@@ -37,6 +37,12 @@
        check('a', r'[!\]')
        check('\\', r'[!\]', 0)

+        check('abcdefghi', 'ab{cd,12*}ef{gh?,34}')
+        check('ab1234ef34', 'ab{cd,12*}ef{gh?,34}')
+
+        check('abcdefgh', 'ab{cd,12*}ef{gh?,34}', 0)
+        check('ab1234ef345', 'ab{cd,12*}ef{gh?,34}', 0)
+
    def test_mix_bytes_str(self):
        self.assertRaises(TypeError, fnmatch, 'test', b'*')
        self.assertRaises(TypeError, fnmatch, b'test', '*')
Index: Lib/test/test_glob.py
===================================================================
--- Lib/test/test_glob.py       (revision 67629)
+++ Lib/test/test_glob.py       (working copy)
@@ -69,6 +69,7 @@
        eq(self.glob('aa?'), map(self.norm, ['aaa', 'aab']))
        eq(self.glob('aa[ab]'), map(self.norm, ['aaa', 'aab']))
        eq(self.glob('*q'), [])
+        eq(self.glob('a{?a,?b}'), map(self.norm, ['aaa', 'aab']))

    def test_glob_nested_directory(self):
        eq = self.assertSequencesEqual_noorder
@@ -89,6 +90,9 @@
           [self.norm('a', 'bcd', 'efg', 'ha')])
        eq(self.glob('?a?', '*F'), map(self.norm, [os.path.join('aaa', 'zzzF'),
                                                   os.path.join('aab', 'F')]))
+        eq(self.glob('a', 'b{c,x}d', '{*}', '*a'),
+           [self.norm('a', 'bcd', 'efg', 'ha')])
+        eq(self.glob('a', 'b{x,y}d', '{*}', '*a'), [])

    def test_glob_directory_with_trailing_slash(self):
        # We are verifying that when there is wildcard pattern which
_______________________________________________
Python-ideas mailing list
Python-ideas@python.org
http://mail.python.org/mailman/listinfo/python-ideas