[Python-checkins] cpython (merge 3.2 -> default): #2650: Merge with 3.2.

ezio.melotti python-checkins at python.org
Fri Mar 25 13:27:22 CET 2011


http://hg.python.org/cpython/rev/9da300ad8255
changeset:   68926:9da300ad8255
parent:      68921:11dc3f270594
parent:      68925:42ab3ebb8c2c
user:        Ezio Melotti <ezio.melotti at gmail.com>
date:        Fri Mar 25 14:26:56 2011 +0200
summary:
  #2650: Merge with 3.2.

files:
  Lib/re.py           |   3 +-
  Lib/test/test_re.py |  75 +++++++++++++++++++++++---------
  2 files changed, 55 insertions(+), 23 deletions(-)


diff --git a/Lib/re.py b/Lib/re.py
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -224,8 +224,7 @@
     if isinstance(pattern, str):
         alphanum = _alphanum_str
         s = list(pattern)
-        for i in range(len(pattern)):
-            c = pattern[i]
+        for i, c in enumerate(pattern):
             if c not in alphanum:
                 if c == "\000":
                     s[i] = "\\000"
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1,7 +1,9 @@
 from test.support import verbose, run_unittest
 import re
 from re import Scanner
-import sys, traceback
+import sys
+import string
+import traceback
 from weakref import proxy
 
 # Misc tests from Tim Peters' re.doc
@@ -411,31 +413,62 @@
         self.assertEqual(re.search("\s(b)", " b").group(1), "b")
         self.assertEqual(re.search("a\s", "a ").group(0), "a ")
 
+    def assertMatch(self, pattern, text, match=None, span=None,
+                    matcher=re.match):
+        if match is None and span is None:
+            # the pattern matches the whole text
+            match = text
+            span = (0, len(text))
+        elif match is None or span is None:
+            raise ValueError('If match is not None, span should be specified '
+                             '(and vice versa).')
+        m = matcher(pattern, text)
+        self.assertTrue(m)
+        self.assertEqual(m.group(), match)
+        self.assertEqual(m.span(), span)
+
     def test_re_escape(self):
-        p=""
-        self.assertEqual(re.escape(p), p)
-        for i in range(0, 256):
-            p = p + chr(i)
-            self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
-                             True)
-            self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
-
-        pat=re.compile(re.escape(p))
-        self.assertEqual(pat.match(p) is not None, True)
-        self.assertEqual(pat.match(p).span(), (0,256))
+        alnum_chars = string.ascii_letters + string.digits
+        p = ''.join(chr(i) for i in range(256))
+        for c in p:
+            if c in alnum_chars:
+                self.assertEqual(re.escape(c), c)
+            elif c == '\x00':
+                self.assertEqual(re.escape(c), '\\000')
+            else:
+                self.assertEqual(re.escape(c), '\\' + c)
+            self.assertMatch(re.escape(c), c)
+        self.assertMatch(re.escape(p), p)
 
     def test_re_escape_byte(self):
-        p=b""
-        self.assertEqual(re.escape(p), p)
-        for i in range(0, 256):
+        alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
+        p = bytes(range(256))
+        for i in p:
             b = bytes([i])
-            p += b
-            self.assertEqual(re.match(re.escape(b), b) is not None, True)
-            self.assertEqual(re.match(re.escape(b), b).span(), (0,1))
+            if b in alnum_chars:
+                self.assertEqual(re.escape(b), b)
+            elif i == 0:
+                self.assertEqual(re.escape(b), b'\\000')
+            else:
+                self.assertEqual(re.escape(b), b'\\' + b)
+            self.assertMatch(re.escape(b), b)
+        self.assertMatch(re.escape(p), p)
 
-        pat=re.compile(re.escape(p))
-        self.assertEqual(pat.match(p) is not None, True)
-        self.assertEqual(pat.match(p).span(), (0,256))
+    def test_re_escape_non_ascii(self):
+        s = 'xxx\u2620\u2620\u2620xxx'
+        s_escaped = re.escape(s)
+        self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
+        self.assertMatch(s_escaped, s)
+        self.assertMatch('.%s+.' % re.escape('\u2620'), s,
+                         'x\u2620\u2620\u2620x', (2, 7), re.search)
+
+    def test_re_escape_non_ascii_bytes(self):
+        b = 'y\u2620y\u2620y'.encode('utf-8')
+        b_escaped = re.escape(b)
+        self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
+        self.assertMatch(b_escaped, b)
+        res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
+        self.assertEqual(len(res), 2)
 
     def pickle_test(self, pickle):
         oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list