[Python-3000-checkins] r66366 - in python/branches/py3k: Lib/re.py Lib/test/test_re.py Misc/NEWS

guido.van.rossum python-3000-checkins at python.org
Wed Sep 10 19:44:36 CEST 2008


Author: guido.van.rossum
Date: Wed Sep 10 19:44:35 2008
New Revision: 66366

Log:
Issue #3756: make re.escape() handle bytes as well as str.
Patch by Andrew McNamara, reviewed and tweaked by myself.


Modified:
   python/branches/py3k/Lib/re.py
   python/branches/py3k/Lib/test/test_re.py
   python/branches/py3k/Misc/NEWS

Modified: python/branches/py3k/Lib/re.py
==============================================================================
--- python/branches/py3k/Lib/re.py	(original)
+++ python/branches/py3k/Lib/re.py	Wed Sep 10 19:44:35 2008
@@ -211,23 +211,38 @@
     "Compile a template pattern, returning a pattern object"
     return _compile(pattern, flags|T)
 
-_alphanum = {}
-for c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890':
-    _alphanum[c] = 1
-del c
+_alphanum_str = frozenset(
+    "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
+_alphanum_bytes = frozenset(
+    b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
 
 def escape(pattern):
     "Escape all non-alphanumeric characters in pattern."
-    s = list(pattern)
-    alphanum = _alphanum
-    for i in range(len(pattern)):
-        c = pattern[i]
-        if c not in alphanum:
-            if c == "\000":
-                s[i] = "\\000"
+    if isinstance(pattern, str):
+        alphanum = _alphanum_str
+        s = list(pattern)
+        for i in range(len(pattern)):
+            c = pattern[i]
+            if c not in alphanum:
+                if c == "\000":
+                    s[i] = "\\000"
+                else:
+                    s[i] = "\\" + c
+        return "".join(s)
+    else:
+        alphanum = _alphanum_bytes
+        s = []
+        esc = ord(b"\\")
+        for c in pattern:
+            if c in alphanum:
+                s.append(c)
             else:
-                s[i] = "\\" + c
-    return pattern[:0].join(s)
+                if c == 0:
+                    s.extend(b"\\000")
+                else:
+                    s.append(esc)
+                    s.append(c)
+        return bytes(s)
 
 # --------------------------------------------------------------------
 # internals
@@ -248,7 +263,8 @@
     pattern, flags = key
     if isinstance(pattern, _pattern_type):
         if flags:
-            raise ValueError('Cannot process flags argument with a compiled pattern')
+            raise ValueError(
+                "Cannot process flags argument with a compiled pattern")
         return pattern
     if not sre_compile.isstring(pattern):
         raise TypeError("first argument must be string or compiled pattern")
@@ -325,7 +341,7 @@
             if i == j:
                 break
             action = self.lexicon[m.lastindex-1][1]
-            if hasattr(action, '__call__'):
+            if hasattr(action, "__call__"):
                 self.match = m
                 action = action(self, m.group())
             if action is not None:

Modified: python/branches/py3k/Lib/test/test_re.py
==============================================================================
--- python/branches/py3k/Lib/test/test_re.py	(original)
+++ python/branches/py3k/Lib/test/test_re.py	Wed Sep 10 19:44:35 2008
@@ -416,6 +416,7 @@
 
     def test_re_escape(self):
         p=""
+        self.assertEqual(re.escape(p), p)
         for i in range(0, 256):
             p = p + chr(i)
             self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
@@ -426,6 +427,19 @@
         self.assertEqual(pat.match(p) is not None, True)
         self.assertEqual(pat.match(p).span(), (0,256))
 
+    def test_re_escape_byte(self):
+        p=b""
+        self.assertEqual(re.escape(p), p)
+        for i in range(0, 256):
+            b = bytes([i])
+            p += b
+            self.assertEqual(re.match(re.escape(b), b) is not None, True)
+            self.assertEqual(re.match(re.escape(b), b).span(), (0,1))
+
+        pat=re.compile(re.escape(p))
+        self.assertEqual(pat.match(p) is not None, True)
+        self.assertEqual(pat.match(p).span(), (0,256))
+
     def pickle_test(self, pickle):
         oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
         s = pickle.dumps(oldpat)

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Wed Sep 10 19:44:35 2008
@@ -96,6 +96,8 @@
 Library
 -------
 
+- Issue #3756: make re.escape() handle bytes as well as str.
+
 - Issue #3800: fix filter() related bug in formatter.py.
 
 - Issue #874900: fix behaviour of threading module after a fork.


More information about the Python-3000-checkins mailing list