[Python-checkins] cpython: issue9584: Add {} list expansion to glob. Original patch by Mathieu Bridon

tim.golden python-checkins at python.org
Tue Nov 6 14:56:02 CET 2012


http://hg.python.org/cpython/rev/dafca4714298
changeset:   80273:dafca4714298
user:        Tim Golden <mail at timgolden.me.uk>
date:        Tue Nov 06 13:50:42 2012 +0000
summary:
  issue9584: Add {} list expansion to glob. Original patch by Mathieu Bridon

files:
  Doc/library/glob.rst  |  11 ++--
  Lib/glob.py           |  65 ++++++++++++++++++++++--------
  Lib/test/test_glob.py |  64 +++++++++++++++++++++++++++++-
  Misc/NEWS             |   3 +
  4 files changed, 118 insertions(+), 25 deletions(-)


diff --git a/Doc/library/glob.rst b/Doc/library/glob.rst
--- a/Doc/library/glob.rst
+++ b/Doc/library/glob.rst
@@ -13,10 +13,10 @@
 
 The :mod:`glob` module finds all the pathnames matching a specified pattern
 according to the rules used by the Unix shell.  No tilde expansion is done, but
-``*``, ``?``, and character ranges expressed with ``[]`` will be correctly
-matched.  This is done by using the :func:`os.listdir` and
-:func:`fnmatch.fnmatch` functions in concert, and not by actually invoking a
-subshell.  (For tilde and shell variable expansion, use
+``*``, ``?``, character ranges expressed with ``[]`` and list of options
+expressed with ``{}`` will be correctly matched.  This is done by using the
+:func:`os.listdir` and :func:`fnmatch.fnmatch` functions in concert, and not by
+actually invoking a subshell.  (For tilde and shell variable expansion, use
 :func:`os.path.expanduser` and :func:`os.path.expandvars`.)
 
 
@@ -47,7 +47,8 @@
    ['1.gif', 'card.gif']
    >>> glob.glob('?.gif')
    ['1.gif']
-
+   >>> glob.glob('?.{gif,txt}')
+   ['1.gif', '2.txt']
 
 .. seealso::
 
diff --git a/Lib/glob.py b/Lib/glob.py
--- a/Lib/glob.py
+++ b/Lib/glob.py
@@ -14,6 +14,7 @@
     """
     return list(iglob(pathname))
 
+
 def iglob(pathname):
     """Return an iterator which yields the paths matching a pathname pattern.
 
@@ -24,21 +25,24 @@
         if os.path.lexists(pathname):
             yield pathname
         return
-    dirname, basename = os.path.split(pathname)
-    if not dirname:
-        yield from glob1(None, basename)
-        return
-    if has_magic(dirname):
-        dirs = iglob(dirname)
-    else:
-        dirs = [dirname]
-    if has_magic(basename):
-        glob_in_dir = glob1
-    else:
-        glob_in_dir = glob0
-    for dirname in dirs:
-        for name in glob_in_dir(dirname, basename):
-            yield os.path.join(dirname, name)
+    pathnames = expand_braces(pathname)
+    for pathname in pathnames:
+      dirname, basename = os.path.split(pathname)
+      if not dirname:
+          yield from glob1(None, basename)
+          return
+
+      if has_magic(dirname):
+          dirs = iglob(dirname)
+      else:
+          dirs = [dirname]
+      if has_magic(basename):
+          glob_in_dir = glob1
+      else:
+          glob_in_dir = glob0
+      for dirname in dirs:
+          for name in glob_in_dir(dirname, basename):
+              yield os.path.join(dirname, name)
 
 # These 2 helper functions non-recursively glob inside a literal directory.
 # They return a list of basenames. `glob1` accepts a pattern while `glob0`
@@ -70,12 +74,37 @@
     return []
 
 
-magic_check = re.compile('[*?[]')
-magic_check_bytes = re.compile(b'[*?[]')
-
+magic_check = re.compile('[*?[{]')
+magic_check_bytes = re.compile(b'[*?[{]')
 def has_magic(s):
     if isinstance(s, bytes):
         match = magic_check_bytes.search(s)
     else:
         match = magic_check.search(s)
     return match is not None
+
+brace_matcher = re.compile(r'.*(\{.+?[^\\]\})')
+def expand_braces(text):
+    """Find the rightmost, innermost set of braces and, if it contains a
+    comma-separated list, expand its contents recursively (any of its items
+    may itself be a list enclosed in braces).
+
+    Return the full set of expanded strings.
+    """
+    res = set()
+
+    match = brace_matcher.search(text)
+    if match is not None:
+        sub = match.group(1)
+        open_brace, close_brace = match.span(1)
+        if "," in sub:
+            for pat in sub.strip('{}').split(','):
+                res.update(expand_braces(text[:open_brace] + pat + text[close_brace:]))
+
+        else:
+            res.update(expand_braces(text[:open_brace] + sub.replace('}', '\\}') + text[close_brace:]))
+
+    else:
+        res.add(text.replace('\\}', '}'))
+
+    return res
diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py
--- a/Lib/test/test_glob.py
+++ b/Lib/test/test_glob.py
@@ -5,7 +5,7 @@
 import os
 import shutil
 
-class GlobTests(unittest.TestCase):
+class GlobTestsBase(unittest.TestCase):
 
     def norm(self, *parts):
         return os.path.normpath(os.path.join(self.tempdir, *parts))
@@ -45,6 +45,8 @@
     def assertSequencesEqual_noorder(self, l1, l2):
         self.assertEqual(set(l1), set(l2))
 
+class GlobTests(GlobTestsBase):
+
     def test_glob_literal(self):
         eq = self.assertSequencesEqual_noorder
         eq(self.glob('a'), [self.norm('a')])
@@ -105,9 +107,67 @@
         eq(self.glob('sym1'), [self.norm('sym1')])
         eq(self.glob('sym2'), [self.norm('sym2')])
 
+class GlobBracesTests(GlobTestsBase):
+
+    def setUp(self):
+        super(GlobBracesTests, self).setUp()
+        self.mktemp('c{}d')
+        self.mktemp('c{deg')
+        self.mktemp('c{dfg')
+        self.mktemp('cd{f}g')
+        self.mktemp('ce{f}g')
+        self.mktemp('cdf}g')
+        self.mktemp('cef}g')
+
+    def match_pattern_with_results(self, patterns, paths):
+        expected = [self.norm(path) for path in [os.path.join(*parts) for parts in paths]]
+        actual = [os.path.normpath(g) for g in self.glob(*patterns)]
+        self.assertSequencesEqual_noorder(actual, expected)
+
+    def test_two_terms(self):
+        self.match_pattern_with_results(['a{aa,ab}'], [["aaa"], ["aab"]])
+
+    def test_missing_first_plus_nested(self):
+        self.match_pattern_with_results(['a{,a{a,b}}'], [['a'], ['aaa'], ['aab']])
+
+    def test_one_subpath_with_two_file_terms(self):
+        self.match_pattern_with_results(['a', '{D,bcd}'], [['a', 'D'], ['a', 'bcd']])
+
+    def test_two_subpath_terms_with_two_file_terms(self):
+        self.match_pattern_with_results(['{aaa,aab}', '{F,zzzF}'], [('aaa', 'zzzF'), ('aab', 'F')])
+
+    def test_two_subpath_terms_with_wildcard_file_term(self):
+        self.match_pattern_with_results(['aa{a,b}', '*F'], [('aaa', 'zzzF'), ('aab', 'F')])
+
+    def test_wildcard_subpath_with_file_missing_first_term(self):
+        self.match_pattern_with_results(['aa?', '{,zzz}F'], [('aaa', 'zzzF'), ('aab', 'F')])
+
+    #
+    # Edge cases where braces should not be expanded
+    #
+    def test_empty_braces(self):
+        self.assertSequencesEqual_noorder(self.glob('c{}d'), [self.norm('c{}d')])
+
+    def test_missing_end_brace(self):
+        self.assertSequencesEqual_noorder(self.glob('c{d{e,f}g'), map(self.norm, ['c{deg', 'c{dfg']))
+
+    def test_second_brace_one_term(self):
+        self.assertSequencesEqual_noorder(self.glob('c{d,e}{f}g'), map(self.norm, ['cd{f}g', 'ce{f}g']))
+
+    def test_outer_term_missing_first_brace(self):
+        self.assertSequencesEqual_noorder(self.glob('c{d,e}f}g'), map(self.norm, ['cdf}g', 'cef}g']))
+
+    #
+    # Braces containing folder separators
+    #
+    def test_embedded_separator1(self):
+        self.match_pattern_with_results(['a/{D,bcd/{EF,efg}}'], [('a', 'D'), ('a', 'bcd', 'EF'), ('a', 'bcd', 'efg')])
+
+    def test_embedded_separator2(self):
+        self.match_pattern_with_results(['aa{a/zzz,b/}F'], [('aaa', 'zzzF'), ('aab', 'F')])
 
 def test_main():
-    run_unittest(GlobTests)
+    run_unittest(GlobTests, GlobBracesTests)
 
 
 if __name__ == "__main__":
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
 Core and Builtins
 -----------------
 
+- Issue #9584: glob.glob now expands braces to a list of strings.
+  Original patch by Mathieu Bridon.
+
 - Issue #8271: the utf-8 decoder now outputs the correct number of U+FFFD
   characters when used with the 'replace' error handler on invalid utf-8
   sequences.  Patch by Serhiy Storchaka, tests by Ezio Melotti.

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list