[Python-checkins] cpython (3.4): Issue #22410: Module level functions in the re module now cache compiled

serhiy.storchaka python-checkins at python.org
Fri Oct 31 00:03:57 CET 2014


https://hg.python.org/cpython/rev/cbdc658b7797
changeset:   93292:cbdc658b7797
branch:      3.4
parent:      93287:77e5487c4de1
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Fri Oct 31 00:53:49 2014 +0200
summary:
  Issue #22410: Module level functions in the re module now cache compiled
locale-dependent regular expressions taking into account the locale.

files:
  Lib/re.py           |  11 ++++++++-
  Lib/test/test_re.py |  37 +++++++++++++++++++++++++++++++++
  Misc/NEWS           |   3 ++
  3 files changed, 49 insertions(+), 2 deletions(-)


diff --git a/Lib/re.py b/Lib/re.py
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -122,6 +122,7 @@
 import sys
 import sre_compile
 import sre_parse
+import _locale
 
 # public symbols
 __all__ = [ "match", "fullmatch", "search", "sub", "subn", "split", "findall",
@@ -275,7 +276,9 @@
     bypass_cache = flags & DEBUG
     if not bypass_cache:
         try:
-            return _cache[type(pattern), pattern, flags]
+            p, loc = _cache[type(pattern), pattern, flags]
+            if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE):
+                return p
         except KeyError:
             pass
     if isinstance(pattern, _pattern_type):
@@ -289,7 +292,11 @@
     if not bypass_cache:
         if len(_cache) >= _MAXCACHE:
             _cache.clear()
-        _cache[type(pattern), pattern, flags] = p
+        if p.flags & LOCALE:
+            loc = _locale.setlocale(_locale.LC_CTYPE)
+        else:
+            loc = None
+        _cache[type(pattern), pattern, flags] = p, loc
     return p
 
 def _compile_repl(repl, pattern):
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1,6 +1,7 @@
 from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
         cpython_only, captured_stdout
 import io
+import locale
 import re
 from re import Scanner
 import sre_compile
@@ -1254,6 +1255,42 @@
         # with ignore case.
         self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3))
 
+    def test_locale_caching(self):
+        # Issue #22410
+        oldlocale = locale.setlocale(locale.LC_CTYPE)
+        self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
+        for loc in 'en_US.iso88591', 'en_US.utf8':
+            try:
+                locale.setlocale(locale.LC_CTYPE, loc)
+            except locale.Error:
+                # Unsupported locale on this system
+                self.skipTest('test needs %s locale' % loc)
+
+        re.purge()
+        self.check_en_US_iso88591()
+        self.check_en_US_utf8()
+        re.purge()
+        self.check_en_US_utf8()
+        self.check_en_US_iso88591()
+
+    def check_en_US_iso88591(self):
+        locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
+        self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
+        self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
+        self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
+        self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
+        self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
+        self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
+
+    def check_en_US_utf8(self):
+        locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
+        self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
+        self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
+        self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
+        self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
+        self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
+        self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
+
 
 class PatternReprTests(unittest.TestCase):
     def check(self, pattern, expected):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -33,6 +33,9 @@
 Library
 -------
 
+- Issue #22410: Module level functions in the re module now cache compiled
+  locale-dependent regular expressions taking into account the locale.
+
 - Issue #8876: distutils now falls back to copying files when hard linking
   doesn't work.  This allows use with special filesystems such as VirtualBox
   shared folders.

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list