[Python-checkins] cpython: Issue #17615: Add tests comparing Unicode strings of different kinds
victor.stinner
python-checkins at python.org
Mon Apr 8 22:35:03 CEST 2013
http://hg.python.org/cpython/rev/db4a1a3d1f90
changeset: 83203:db4a1a3d1f90
user: Victor Stinner <victor.stinner at gmail.com>
date: Mon Apr 08 22:34:43 2013 +0200
summary:
Issue #17615: Add tests comparing Unicode strings of different kinds
Kinds: ascii, latin, bmp, astral.
files:
Lib/test/test_unicode.py | 85 ++++++++++++++++++++++++++++
1 files changed, 85 insertions(+), 0 deletions(-)
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -7,6 +7,7 @@
"""#"
import _string
import codecs
+import itertools
import struct
import sys
import unittest
@@ -31,6 +32,16 @@
return None
codecs.register(search_function)
+def duplicate_string(text):
+ """
+ Try to get a fresh clone of the specified text:
+ new object with a reference count of 1.
+
+ This is a best-effort: latin1 single letters and the empty
+ string ('') are singletons and cannot be cloned.
+ """
+ return text.encode().decode()
+
class UnicodeTest(string_tests.CommonTest,
string_tests.MixinStrUnicodeUserStringTest,
string_tests.MixinStrUnicodeTest,
@@ -2208,6 +2219,80 @@
self.assertNotEqual(abc, abcdef)
self.assertEqual(abcdef.decode('unicode_internal'), text)
+ def test_compare(self):
+ # Issue #17615
+ N = 10
+ ascii = 'a' * N
+ ascii2 = 'z' * N
+ latin = '\x80' * N
+ latin2 = '\xff' * N
+ bmp = '\u0100' * N
+ bmp2 = '\uffff' * N
+ astral = '\U00100000' * N
+ astral2 = '\U0010ffff' * N
+ strings = (
+ ascii, ascii2,
+ latin, latin2,
+ bmp, bmp2,
+ astral, astral2)
+ for text1, text2 in itertools.combinations(strings, 2):
+ equal = (text1 is text2)
+ self.assertEqual(text1 == text2, equal)
+ self.assertEqual(text1 != text2, not equal)
+
+ if equal:
+ self.assertTrue(text1 <= text2)
+ self.assertTrue(text1 >= text2)
+
+ # text1 is text2: duplicate strings to skip the "str1 == str2"
+ # optimization in unicode_compare_eq() and really compare
+ # character per character
+ copy1 = duplicate_string(text1)
+ copy2 = duplicate_string(text2)
+ self.assertIsNot(copy1, copy2)
+
+ self.assertTrue(copy1 == copy2)
+ self.assertFalse(copy1 != copy2)
+
+ self.assertTrue(copy1 <= copy2)
+ self.assertTrue(copy2 >= copy2)
+
+ self.assertTrue(ascii < ascii2)
+ self.assertTrue(ascii < latin)
+ self.assertTrue(ascii < bmp)
+ self.assertTrue(ascii < astral)
+ self.assertFalse(ascii >= ascii2)
+ self.assertFalse(ascii >= latin)
+ self.assertFalse(ascii >= bmp)
+ self.assertFalse(ascii >= astral)
+
+ self.assertFalse(latin < ascii)
+ self.assertTrue(latin < latin2)
+ self.assertTrue(latin < bmp)
+ self.assertTrue(latin < astral)
+ self.assertTrue(latin >= ascii)
+ self.assertFalse(latin >= latin2)
+ self.assertFalse(latin >= bmp)
+ self.assertFalse(latin >= astral)
+
+ self.assertFalse(bmp < ascii)
+ self.assertFalse(bmp < latin)
+ self.assertTrue(bmp < bmp2)
+ self.assertTrue(bmp < astral)
+ self.assertTrue(bmp >= ascii)
+ self.assertTrue(bmp >= latin)
+ self.assertFalse(bmp >= bmp2)
+ self.assertFalse(bmp >= astral)
+
+ self.assertFalse(astral < ascii)
+ self.assertFalse(astral < latin)
+ self.assertFalse(astral < bmp2)
+ self.assertTrue(astral < astral2)
+ self.assertTrue(astral >= ascii)
+ self.assertTrue(astral >= latin)
+ self.assertTrue(astral >= bmp2)
+ self.assertFalse(astral >= astral2)
+
class StringModuleTest(unittest.TestCase):
def test_formatter_parser(self):
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list