[Python-checkins] bpo-32775: Fix regular expression warnings in fnmatch. (#5583)

Serhiy Storchaka webhook-mailer at python.org
Fri Feb 9 06:30:23 EST 2018


https://github.com/python/cpython/commit/23cdbfa744f0ec0e9e7575d378df4cb758691cd3
commit: 23cdbfa744f0ec0e9e7575d378df4cb758691cd3
branch: master
author: Serhiy Storchaka <storchaka at gmail.com>
committer: GitHub <noreply at github.com>
date: 2018-02-09T13:30:19+02:00
summary:

bpo-32775: Fix regular expression warnings in fnmatch. (#5583)

fnmatch.translate() no longer produces patterns which contain set
operations.

Sets starting with '[' or containing '--', '&&', '~~' or '||' will
be interpreted differently in regular expressions in future versions.
Currently they emit warnings. fnmatch.translate() now avoids producing
patterns containing such sets by accident.

files:
A Misc/NEWS.d/next/Library/2018-02-07-19-12-10.bpo-32775.-T77_c.rst
M Lib/fnmatch.py
M Lib/test/test_fnmatch.py

diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index fd3b5142e348..b98e6413295e 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -97,11 +97,30 @@ def translate(pat):
             if j >= n:
                 res = res + '\\['
             else:
-                stuff = pat[i:j].replace('\\','\\\\')
+                stuff = pat[i:j]
+                if '--' not in stuff:
+                    stuff = stuff.replace('\\', r'\\')
+                else:
+                    chunks = []
+                    k = i+2 if pat[i] == '!' else i+1
+                    while True:
+                        k = pat.find('-', k, j)
+                        if k < 0:
+                            break
+                        chunks.append(pat[i:k])
+                        i = k+1
+                        k = k+3
+                    chunks.append(pat[i:j])
+                    # Escape backslashes and hyphens for set difference (--).
+                    # Hyphens that create ranges shouldn't be escaped.
+                    stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
+                                     for s in chunks)
+                # Escape set operations (&&, ~~ and ||).
+                stuff = re.sub(r'([&~|])', r'\\\1', stuff)
                 i = j+1
                 if stuff[0] == '!':
                     stuff = '^' + stuff[1:]
-                elif stuff[0] == '^':
+                elif stuff[0] in ('^', '['):
                     stuff = '\\' + stuff
                 res = '%s[%s]' % (res, stuff)
         else:
diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 78245c3ab1a1..55f9f0d3a542 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -2,6 +2,7 @@
 
 import unittest
 import os
+import warnings
 
 from fnmatch import fnmatch, fnmatchcase, translate, filter
 
@@ -83,6 +84,17 @@ def test_sep(self):
         check('usr/bin', 'usr\\bin', normsep)
         check('usr\\bin', 'usr\\bin')
 
+    def test_warnings(self):
+        with warnings.catch_warnings():
+            warnings.simplefilter('error', Warning)
+            check = self.check_match
+            check('[', '[[]')
+            check('&', '[a&&b]')
+            check('|', '[a||b]')
+            check('~', '[a~~b]')
+            check(',', '[a-z+--A-Z]')
+            check('.', '[a-z--/A-Z]')
+
 
 class TranslateTestCase(unittest.TestCase):
 
diff --git a/Misc/NEWS.d/next/Library/2018-02-07-19-12-10.bpo-32775.-T77_c.rst b/Misc/NEWS.d/next/Library/2018-02-07-19-12-10.bpo-32775.-T77_c.rst
new file mode 100644
index 000000000000..ed563c17fdc8
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-02-07-19-12-10.bpo-32775.-T77_c.rst
@@ -0,0 +1,5 @@
+:func:`fnmatch.translate()` no longer produces patterns which contain set
+operations. Sets starting with '[' or containing '--', '&&', '~~' or '||'
+will be interpreted differently in regular expressions in future versions.
+Currently they emit warnings. fnmatch.translate() now avoids producing
+patterns containing such sets by accident.



More information about the Python-checkins mailing list