[Jython-checkins] jython: Mixed comparison (unicode, str) respects default encoding (fixes #2638).
jeff.allen
jython-checkins at python.org
Tue Nov 21 17:39:10 EST 2017
https://hg.python.org/jython/rev/f71e0b2cfaf7
changeset: 8139:f71e0b2cfaf7
user: Jeff Allen <ja.py at farowl.co.uk>
date: Sun Nov 19 19:56:33 2017 +0000
summary:
Mixed comparison (unicode, str) respects default encoding (fixes #2638).
PyUnicode is given its own implementation of the rich comparison
operators rather than inheriting from PyString (which is to treat
encoded bytes as latin-1 characters). Corresponding tests are added to
test_unicode_jy, where coverage of other byte types is also improved.
files:
Lib/test/test_unicode_jy.py | 466 +++++++++++-----
src/org/python/core/PyUnicode.java | 73 ++-
2 files changed, 383 insertions(+), 156 deletions(-)
diff --git a/Lib/test/test_unicode_jy.py b/Lib/test/test_unicode_jy.py
--- a/Lib/test/test_unicode_jy.py
+++ b/Lib/test/test_unicode_jy.py
@@ -122,8 +122,8 @@
self.assertRaises(UnicodeDecodeError, '毛泽东'.join, [u'foo', u'bar'])
def test_file_encoding(self):
- '''Ensure file writing doesn't attempt to encode things by default and reading doesn't
- decode things by default. This was jython's behavior prior to 2.2.1'''
+ # Ensure file writing doesn't attempt to encode things by default and reading doesn't
+ # decode things by default. This was jython's behavior prior to 2.2.1'''
EURO_SIGN = u"\u20ac"
try:
EURO_SIGN.encode()
@@ -852,6 +852,7 @@
self.assertRaises(ValueError, fmt.format, u"{0}", 10, 20, i=100)
self.assertRaises(ValueError, fmt.format, u"{i}", 10, 20, i=100)
+
class UnicodeSpaceTest(unittest.TestCase):
# Test classification of characters as whitespace (some Jython divergence)
@@ -900,6 +901,7 @@
self.assertEqual(2, len(s.split()), "no split made in " + repr(s))
self.assertEqual(2, len(s.rsplit()), "no rsplit made in " + repr(s))
+
class EncodingContext(object):
"""Context manager to save and restore the encoding.
@@ -923,70 +925,196 @@
class DefaultDecodingTestCase(unittest.TestCase):
- # Test use of default encoding to coerce str to unicode
+ # Test use of default encoding to coerce byte-like data to unicode
+
+ BYTE_TYPES = (str, buffer, bytearray, memoryview)
+ BYTE_TYPES_COMPARE = (str, buffer) # Restricted as for CPython __eq__ etc.
+
+ if not test_support.is_jython:
+ # CPython restricts the acceptable the byte-like types by context
+ BYTE_TYPES = (str, buffer)
+ BYTE_TYPES_COMPARE = (str, buffer)
+
+ # Operators
def test_add(self):
+ cs = self.encoding
ref = u'café crème'
- s1 = ref[:4].encode(self.encoding)
- s2 = ref[4:].encode(self.encoding)
- with EncodingContext(self.encoding):
+ s1 = ref[:4].encode(cs)
+ s2 = ref[4:].encode(cs)
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES:
+ #print B,
+ b2 = B(s2)
+ self.assertEqual( ref[:4] + b2, ref)
+ # Really we're testing that str promotes. Other Bs may not.
self.assertEqual( s1 + ref[4:], ref)
- self.assertEqual( ref[:4] + s2, ref)
def test_in(self):
+ cs = self.encoding
ref = u'café crème'
- with EncodingContext(self.encoding):
- self.assertTrue(u'é'.encode(self.encoding) in ref)
- self.assertTrue(u'fé'.encode(self.encoding) in ref)
- # Fails if the string is interpreted as code points.
- if self.encoding != 'latin-1':
- self.assertFalse('\xc3\xa9' in u'caf\xc3\xa9')
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES:
+ #print B,
+ self.assertTrue(B(u'é'.encode(cs)) in ref)
+ self.assertTrue(B(u'fé'.encode(cs)) in ref)
+ # Fails if the string is interpreted as code points.
+ if cs != 'latin-1':
+ self.assertFalse(B('\xc3\xa9') in u'caf\xc3\xa9')
def test_eq(self):
- ref = u'café crème'
- b = ref.encode(self.encoding)
- with EncodingContext(self.encoding):
- self.assertTrue(ref == b)
- self.assertTrue(b == ref)
+ cs = self.encoding
+ u = u"Un caf\xe9 cr\xe8me."
+ # Derive a string such that u1 != u and the encoded versions s, s1
+ u1 = u.replace('cr', 'm')
+ s, s1 = u.encode(cs), u1.encode(cs)
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES_COMPARE:
+ #print B,
+ b, b1 = B(s), B(s1)
+ self.assertTrue (u == b )
+ self.assertTrue (b == u )
+ self.assertFalse(u == b1)
+ self.assertFalse(b1== u )
+ # Check not implicitly comparing as latin-1.
+ if cs != 'latin-1':
+ b = B('caf\xc3\xa9')
+ self.assertFalse(u'caf\xc3\xa9'== b)
+ self.assertFalse(b == u'caf\xc3\xa9')
def test_ne(self):
- with EncodingContext(self.encoding):
- # Fails if the string is interpreted as code points.
- if self.encoding != 'latin-1':
- self.assertFalse(u'caf\xc3\xa9'== 'caf\xc3\xa9')
- self.assertFalse('caf\xc3\xa9' == u'caf\xc3\xa9')
+ cs = self.encoding
+ u = u"Un caf\xe9 cr\xe8me."
+ # Derive a string such that u1 != u and the encoded versions s, s1
+ u1 = u.replace('cr', 'm')
+ s, s1 = u.encode(cs), u1.encode(cs)
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES_COMPARE:
+ #print B,
+ b, b1 = B(s), B(s1)
+ self.assertTrue (u != b1)
+ self.assertTrue (b != u1)
+ self.assertFalse(u != b )
+ self.assertFalse(b != u )
+ # Check not implicitly comparing as latin-1.
+ if cs != 'latin-1':
+ b = B('caf\xc3\xa9')
+ self.assertTrue(u'caf\xc3\xa9'!= b)
+ self.assertTrue(b != u'caf\xc3\xa9')
+
+ def test_lt(self):
+ cs = self.encoding
+ u = u"Un caf\xe9 cr\xe8me."
+ # Derive strings such that u0 < u < u1 and their encodings
+ u0 = u.replace('cr', 'Cr')
+ u1 = u.replace('.', '?')
+ s0, s, s1 = u0.encode(cs), u.encode(cs), u1.encode(cs)
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES_COMPARE:
+ #print B,
+ b0, b, b1 = B(s0), B(s), B(s1)
+ self.assertTrue (b0 < u )
+ self.assertFalse(b < u )
+ self.assertFalse(b1 < u )
+ self.assertFalse(u < b0)
+ self.assertFalse(u < b )
+ self.assertTrue (u < b1)
+
+ def test_le(self):
+ cs = self.encoding
+ u = u"Un caf\xe9 cr\xe8me."
+ # Derive strings such that u0 < u < u1 and their encodings
+ u0 = u.replace('cr', 'Cr')
+ u1 = u.replace('.', '?')
+ s0, s, s1 = u0.encode(cs), u.encode(cs), u1.encode(cs)
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES_COMPARE:
+ #print B,
+ b0, b, b1 = B(s0), B(s), B(s1)
+ self.assertTrue (b0 <= u )
+ self.assertTrue (b <= u )
+ self.assertFalse(b1 <= u )
+ self.assertFalse(u <= b0)
+ self.assertTrue (u <= b )
+ self.assertTrue (u <= b1)
+
+ def test_gt(self):
+ cs = self.encoding
+ u = u"Un caf\xe9 cr\xe8me."
+ # Derive strings such that u0 < u < u1 and their encodings
+ u0 = u.replace('cr', 'Cr')
+ u1 = u.replace('.', '?')
+ s0, s, s1 = u0.encode(cs), u.encode(cs), u1.encode(cs)
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES_COMPARE:
+ #print B,
+ b0, b, b1 = B(s0), B(s), B(s1)
+ self.assertTrue (b1 > u )
+ self.assertFalse(b > u )
+ self.assertFalse(b0 > u )
+ self.assertFalse(u > b1)
+ self.assertFalse(u > b )
+ self.assertTrue (u > b0)
+
+ def test_ge(self):
+ cs = self.encoding
+ u = u"Un caf\xe9 cr\xe8me."
+ # Derive strings such that u0 < u < u1 and their encodings
+ u0 = u.replace('cr', 'Cr')
+ u1 = u.replace('.', '?')
+ s0, s, s1 = u0.encode(cs), u.encode(cs), u1.encode(cs)
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES_COMPARE:
+ #print B,
+ b0, b, b1 = B(s0), B(s), B(s1)
+ self.assertTrue (b1 >= u )
+ self.assertTrue (b >= u )
+ self.assertFalse(b0 >= u )
+ self.assertFalse(u >= b1)
+ self.assertTrue (u >= b )
+ self.assertTrue (u >= b0)
+
+
+ # Methods
def test_count(self):
+ cs = self.encoding
ref = u'Le café des fées égarées'
- with EncodingContext(self.encoding):
- self.assertEqual(ref.count(u'é'.encode(self.encoding)), 4)
- self.assertEqual(ref.count(u'fé'.encode(self.encoding)), 2)
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES:
+ #print B,
+ self.assertEqual(ref.count(B(u'é'.encode(cs))), 4)
+ self.assertEqual(ref.count(B(u'fé'.encode(cs))), 2)
def test_endswith(self):
+ cs = self.encoding
# Set up the test using unicode values and indices
ref = u'café crème'
s, u, v = ref[-4:], u'èm£', u'èµe'
# Encode all this
- enc = ref.encode(self.encoding)
- u1, v1 = u.encode(self.encoding), v.encode(self.encoding)
- s1 = s.encode(self.encoding)
+ enc = ref.encode(cs)
+ s1, u1, v1 = s.encode(cs), u.encode(cs), v.encode(cs)
- with EncodingContext(self.encoding):
- # Test with single argument
- self.assertFalse(ref.endswith(v1))
- self.assertTrue(ref.endswith(s1))
- # Test with a mixed tuple as the argument
- self.assertFalse(ref.endswith((u1, u, v1, v)))
- self.assertTrue(ref.endswith((u1, s1, v1)))
- self.assertTrue(ref.endswith((u1, u, s1, v1, v)))
- self.assertFalse(enc.endswith((u1, v1, u, v)))
- self.assertTrue(enc.endswith((u, s, v)))
- self.assertTrue(enc.endswith((u1, u, s, v1, v)))
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES:
+ #print B,
+ sb, ub, vb = B(s1), B(u1), B(v1)
+ # Test with single argument
+ self.assertFalse(ref.endswith(vb))
+ self.assertTrue(ref.endswith(sb))
+ # Test with a mixed tuple as the argument
+ self.assertFalse(ref.endswith((ub, u, vb, v)))
+ self.assertTrue(ref.endswith((ub, sb, vb)))
+ self.assertTrue(ref.endswith((ub, u, sb, vb, v)))
+ self.assertFalse(enc.endswith((ub, vb, u, v)))
+ self.assertTrue(enc.endswith((u, s, v)))
+ self.assertTrue(enc.endswith((ub, u, s, vb, v)))
def test_endswith_slice(self):
+ cs = self.encoding
# Set up the test using unicode values and indices
ref = u'«Un café crème?»'
- if len(u'«»'.encode(self.encoding))!=2 and not test_support.is_jython:
+ if len(u'«»'.encode(cs))!=2 and not test_support.is_jython:
# CPython fails on str.startswith(unicode, int, int) as it passes
# byte indices to unicode.startswith(unicode, int, int) unchanged.
# It only works if « and » encode to single bytes. Easier test:
@@ -994,133 +1122,170 @@
a, b = 4, -2
s, u, v = ref[b-4:b], u'èm£', u'èµe'
# Encode all this, including the indices
- enc = ref.encode(self.encoding)
- u1, v1 = u.encode(self.encoding), v.encode(self.encoding)
- a1 = len(ref[:a].encode(self.encoding))
- b1 = - len(ref[b:].encode(self.encoding))
- s1 = s.encode(self.encoding)
+ enc = ref.encode(cs)
+ u1, v1 = u.encode(cs), v.encode(cs)
+ a1 = len(ref[:a].encode(cs))
+ b1 = - len(ref[b:].encode(cs))
+ s1 = s.encode(cs)
- with EncodingContext(self.encoding):
- # Test the assumption on which the test is based
- self.assertEqual(ref[a:b], enc[a1:b1])
- # Test slice with single argument
- self.assertFalse(ref.endswith(v1, a, b))
- self.assertTrue(ref.endswith(s1, a, b))
- self.assertFalse(enc.endswith(v1, a1, b1))
- self.assertTrue(enc.endswith(s, a1, b1))
- # CPython would pass:
- #self.assertTrue(enc.endswith(s, a, b))
- # Test slice with a mixed tuple as the argument
- self.assertFalse(ref.endswith((u1, u, v1, v), a, b))
- self.assertTrue(ref.endswith((u1, s1, v1), a, b))
- self.assertTrue(ref.endswith((u1, u, s1, v1, v), a, b))
- self.assertFalse(enc.endswith((u1, v1, u, v), a1, b1))
- self.assertTrue(enc.endswith((u, s, v), a1, b1))
- self.assertTrue(enc.endswith((u1, u, s, v1, v), a1, b1))
- # CPython would pass:
- #self.assertTrue(enc.endswith((u, s, v), a, b))
- #self.assertTrue(enc.endswith((u1, u, s, v1, v), a, b))
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES:
+ #print B,
+ sb, ub, vb = B(s1), B(u1), B(v1)
+ # Test the assumption on which the test is based
+ self.assertEqual(ref[a:b], enc[a1:b1])
+ # Test slice with single argument
+ self.assertFalse(ref.endswith(vb, a, b))
+ self.assertTrue(ref.endswith(sb, a, b))
+ self.assertFalse(enc.endswith(vb, a1, b1))
+ self.assertTrue(enc.endswith(s, a1, b1))
+ # CPython would pass:
+ #self.assertTrue(enc.endswith(s, a, b))
+ # Test slice with a mixed tuple as the argument
+ self.assertFalse(ref.endswith((ub, u, vb, v), a, b))
+ self.assertTrue(ref.endswith((ub, sb, vb), a, b))
+ self.assertTrue(ref.endswith((ub, u, sb, vb, v), a, b))
+ self.assertFalse(enc.endswith((ub, vb, u, v), a1, b1))
+ self.assertTrue(enc.endswith((u, s, v), a1, b1))
+ self.assertTrue(enc.endswith((ub, u, s, vb, v), a1, b1))
+ # CPython would pass:
+ #self.assertTrue(enc.endswith((u, s, v), a, b))
+ #self.assertTrue(enc.endswith((ub, u, s, vb, v), a, b))
def test_find(self):
+ cs = self.encoding
ref = u'café crème'
- sub = u'è'.encode(self.encoding)
- with EncodingContext(self.encoding):
- self.assertEqual(ref.find(sub), 7)
+ sub = u'è'.encode(cs)
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES:
+ #print B,
+ self.assertEqual(ref.find(B(sub)), 7)
def test_index(self):
+ cs = self.encoding
ref = u'café crème'
- sub = u'è'.encode(self.encoding)
- with EncodingContext(self.encoding):
- self.assertEqual(ref.index(sub), 7)
+ sub = u'è'.encode(cs)
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES:
+ #print B,
+ self.assertEqual(ref.index(B(sub)), 7)
def test_lstrip(self):
+ cs = self.encoding
ref = u"¤£¥¥£¤du blé £"
- sep = u'¥£¤'.encode(self.encoding)
- with EncodingContext(self.encoding):
+ sep = u'¥£¤'.encode(cs)
+ with EncodingContext(cs):
self.assertEqual(ref.lstrip(sep), u"du blé £")
def test_partition(self):
+ cs = self.encoding
ref = u"Des fées hébétées."
- sep1 = u'é'.encode(self.encoding)
- sep2 = u'ées'.encode(self.encoding)
- with EncodingContext(self.encoding):
- self.assertEqual(ref.partition(sep1), (u"Des f", u"é", u"es hébétées."))
- self.assertEqual(ref.partition(sep2), (u"Des f", u"ées", u" hébétées."))
+ sep1 = u'é'.encode(cs)
+ sep2 = u'ées'.encode(cs)
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES:
+ #print B,
+ self.assertEqual(ref.partition(B(sep1)), (u"Des f", u"é", u"es hébétées."))
+ self.assertEqual(ref.partition(B(sep2)), (u"Des f", u"ées", u" hébétées."))
def test_replace(self):
+ cs = self.encoding
ref = u"Été."
- a = u'É'.encode(self.encoding)
- b = u'é'.encode(self.encoding)
- with EncodingContext(self.encoding):
- self.assertEqual(ref.replace(a, b), u"été.")
- self.assertEqual(ref.replace(b, a), u"ÉtÉ.")
+ a = u'É'.encode(cs)
+ b = u'é'.encode(cs)
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES:
+ #print B,
+ self.assertEqual(ref.replace(B(a), B(b)), u"été.")
+ self.assertEqual(ref.replace(B(b), B(a)), u"ÉtÉ.")
def test_rfind(self):
+ cs = self.encoding
ref = u'café crème'
- sub = u'é'.encode(self.encoding)
- with EncodingContext(self.encoding):
- self.assertEqual(ref.rfind(sub), 3)
+ sub = u'é'.encode(cs)
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES:
+ #print B,
+ self.assertEqual(ref.rfind(B(sub)), 3)
def test_rindex(self):
+ cs = self.encoding
ref = u'café crème'
- sub = u'é'.encode(self.encoding)
- with EncodingContext(self.encoding):
- self.assertEqual(ref.index(sub), 3)
+ sub = u'é'.encode(cs)
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES:
+ #print B,
+ self.assertEqual(ref.index(B(sub)), 3)
def test_rpartition(self):
+ cs = self.encoding
ref = u"Des fées hébétées."
- sep1 = u'é'.encode(self.encoding)
- sep2 = u'ées'.encode(self.encoding)
- with EncodingContext(self.encoding):
- self.assertEqual(ref.rpartition(sep1), (u"Des fées hébét", u"é", u"es."))
- self.assertEqual(ref.rpartition(sep2), (u"Des fées hébét", u"ées", u"."))
+ sep1 = u'é'.encode(cs)
+ sep2 = u'ées'.encode(cs)
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES:
+ #print B,
+ self.assertEqual(ref.rpartition(B(sep1)), (u"Des fées hébét", u"é", u"es."))
+ self.assertEqual(ref.rpartition(B(sep2)), (u"Des fées hébét", u"ées", u"."))
def test_rsplit(self):
+ cs = self.encoding
ref = u"Des fées hébétées."
- sep1 = u'é'.encode(self.encoding)
- sep2 = u'ées'.encode(self.encoding)
- with EncodingContext(self.encoding):
- self.assertEqual(ref.rsplit(sep1, 3), [u"Des fées h", u"b", u"t", u"es."])
- self.assertEqual(ref.rsplit(sep2), [u"Des f", u" hébét", u"."])
+ sep1 = u'é'.encode(cs)
+ sep2 = u'ées'.encode(cs)
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES:
+ #print B,
+ self.assertEqual(ref.rsplit(B(sep1), 3), [u"Des fées h", u"b", u"t", u"es."])
+ self.assertEqual(ref.rsplit(B(sep2)), [u"Des f", u" hébét", u"."])
def test_rstrip(self):
+ cs = self.encoding
ref = u"£ du bl饣¤¤£¥"
- sep = u'¥£¤'.encode(self.encoding)
- with EncodingContext(self.encoding):
+ sep = u'¥£¤'.encode(cs)
+ with EncodingContext(cs):
self.assertEqual(ref.rstrip(sep), u"£ du blé")
def test_split(self):
+ cs = self.encoding
ref = u"Des fées hébétées."
- sep1 = u'é'.encode(self.encoding)
- sep2 = u'ées'.encode(self.encoding)
- with EncodingContext(self.encoding):
- self.assertEqual(ref.split(sep1, 3), [u"Des f", u"es h", u"b", u"tées."])
- self.assertEqual(ref.split(sep2), [u"Des f", u" hébét", u"."])
+ sep1 = u'é'.encode(cs)
+ sep2 = u'ées'.encode(cs)
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES:
+ #print B,
+ self.assertEqual(ref.split(B(sep1), 3), [u"Des f", u"es h", u"b", u"tées."])
+ self.assertEqual(ref.split(B(sep2)), [u"Des f", u" hébét", u"."])
def test_startsswith(self):
+ cs = self.encoding
# Set up the test using unicode values and indices
ref = u'café crème'
s, u, v = ref[:4], u'©af', u'caf£'
# Encode all this
- enc = ref.encode(self.encoding)
- u1, v1 = u.encode(self.encoding), v.encode(self.encoding)
- s1 = s.encode(self.encoding)
+ enc = ref.encode(cs)
+ u1, v1 = u.encode(cs), v.encode(cs)
+ s1 = s.encode(cs)
- with EncodingContext(self.encoding):
- self.assertFalse(ref.startswith(v1))
- self.assertTrue(ref.startswith(enc[:5]))
- # Test with a mixed tuple as the argument
- self.assertFalse(ref.startswith((u1, u, v1, v)))
- self.assertTrue(ref.startswith((u1, enc[:5], v1)))
- self.assertTrue(ref.startswith((u1, u, enc[:5], v1, v)))
- self.assertFalse(enc.startswith((u1, v1, u, v)))
- self.assertTrue(enc.startswith((u, ref[:4], v)))
- self.assertTrue(enc.startswith((u1, u, ref[:4], v1, v)))
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES:
+ #print B,
+ sb, ub, vb, b5 = B(s1), B(u1), B(v1), B(enc[:5])
+ self.assertFalse(ref.startswith(vb))
+ self.assertTrue(ref.startswith(b5))
+ # Test with a mixed tuple as the argument
+ self.assertFalse(ref.startswith((ub, u, vb, v)))
+ self.assertTrue(ref.startswith((ub, b5, vb)))
+ self.assertTrue(ref.startswith((ub, u, b5, vb, v)))
+ self.assertFalse(enc.startswith((ub, vb, u, v)))
+ self.assertTrue(enc.startswith((u, ref[:4], v)))
+ self.assertTrue(enc.startswith((ub, u, ref[:4], vb, v)))
def test_startsswith_slice(self):
+ cs = self.encoding
# Set up the test using unicode values and indices
ref = u'«Un café crème?»'
- if len(u'«»'.encode(self.encoding))!=2 and not test_support.is_jython:
+ if len(u'«»'.encode(cs))!=2 and not test_support.is_jython:
# CPython fails on str.startswith(unicode, int, int) as it passes
# byte indices to unicode.startswith(unicode, int, int) unchanged.
# It only works if « and » encode to single bytes. Easier test:
@@ -1128,40 +1293,43 @@
a, b = 4, -2
s, u, v = ref[a:a+4], u'©af', u'caf£'
# Encode all this, including the indices
- enc = ref.encode(self.encoding)
- u1, v1 = u.encode(self.encoding), v.encode(self.encoding)
- a1 = len(ref[:a].encode(self.encoding))
- b1 = - len(ref[b:].encode(self.encoding))
- s1 = s.encode(self.encoding)
+ enc = ref.encode(cs)
+ u1, v1 = u.encode(cs), v.encode(cs)
+ a1 = len(ref[:a].encode(cs))
+ b1 = - len(ref[b:].encode(cs))
+ s1 = s.encode(cs)
- with EncodingContext(self.encoding):
- # Test the assumption on which the test is based
- self.assertEqual(ref[a:b], enc[a1:b1])
- # Test slice with single argument
- self.assertFalse(ref.startswith(v, a, b))
- self.assertTrue(ref.startswith(s1, a, b))
- self.assertFalse(enc.startswith(v1, a1, b1))
- self.assertTrue(enc.startswith(s, a1, b1))
- # CPython would pass:
- #self.assertTrue(enc.startswith(s, a, b))
- # Test slice with a mixed tuple as the argument
- self.assertFalse(ref.startswith((u1, u, v1, v), a, b))
- self.assertTrue(ref.startswith((u1, s1, v1), a, b))
- self.assertTrue(ref.startswith((u1, u, s1, v1, v), a, b))
- self.assertFalse(enc.startswith((u1, v1, u, v), a1, b1))
- self.assertTrue(enc.startswith((u, s, v), a1, b1))
- self.assertTrue(enc.startswith((u1, u, s, v1, v), a1, b1))
- # CPython would pass:
- #self.assertTrue(enc.startswith((u, s, v), a, b))
- #self.assertTrue(enc.startswith((u1, u, s, v1, v), a, b))
+ with EncodingContext(cs):
+ for B in self.BYTE_TYPES:
+ #print B,
+ sb, ub, vb = B(s1), B(u1), B(v1)
+ # Test the assumption on which the test is based
+ self.assertEqual(ref[a:b], enc[a1:b1])
+ # Test slice with single argument
+ self.assertFalse(ref.startswith(v, a, b))
+ self.assertTrue(ref.startswith(sb, a, b))
+ self.assertFalse(enc.startswith(vb, a1, b1))
+ self.assertTrue(enc.startswith(s, a1, b1))
+ # CPython would pass:
+ #self.assertTrue(enc.startswith(s, a, b))
+ # Test slice with a mixed tuple as the argument
+ self.assertFalse(ref.startswith((ub, u, vb, v), a, b))
+ self.assertTrue(ref.startswith((ub, sb, vb), a, b))
+ self.assertTrue(ref.startswith((ub, u, sb, vb, v), a, b))
+ self.assertFalse(enc.startswith((ub, vb, u, v), a1, b1))
+ self.assertTrue(enc.startswith((u, s, v), a1, b1))
+ self.assertTrue(enc.startswith((ub, u, s, vb, v), a1, b1))
+ # CPython would pass:
+ #self.assertTrue(enc.startswith((u, s, v), a, b))
+ #self.assertTrue(enc.startswith((ub, u, s, vb, v), a, b))
def test_strip(self):
+ cs = self.encoding
ref = u"¤£¥¥£¤du bl饣¤¤£¥"
- sep = u'¥£¤'.encode(self.encoding)
- with EncodingContext(self.encoding):
+ sep = u'¥£¤'.encode(cs)
+ with EncodingContext(cs):
self.assertEqual(ref.strip(sep), u"du blé")
-
class DefaultDecodingLatin1(DefaultDecodingTestCase):
encoding = "latin-1"
diff --git a/src/org/python/core/PyUnicode.java b/src/org/python/core/PyUnicode.java
--- a/src/org/python/core/PyUnicode.java
+++ b/src/org/python/core/PyUnicode.java
@@ -759,6 +759,62 @@
}
}
+ @Override
+ public PyObject __lt__(PyObject other) {
+ return unicode___lt__(other);
+ }
+
+ @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___lt___doc)
+ final PyObject unicode___lt__(PyObject other) {
+ String s = coerceForComparison(other);
+ if (s == null) {
+ return null;
+ }
+ return getString().compareTo(s) < 0 ? Py.True : Py.False;
+ }
+
+ @Override
+ public PyObject __le__(PyObject other) {
+ return unicode___le__(other);
+ }
+
+ @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___le___doc)
+ final PyObject unicode___le__(PyObject other) {
+ String s = coerceForComparison(other);
+ if (s == null) {
+ return null;
+ }
+ return getString().compareTo(s) <= 0 ? Py.True : Py.False;
+ }
+
+ @Override
+ public PyObject __gt__(PyObject other) {
+ return unicode___gt__(other);
+ }
+
+ @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___gt___doc)
+ final PyObject unicode___gt__(PyObject other) {
+ String s = coerceForComparison(other);
+ if (s == null) {
+ return null;
+ }
+ return getString().compareTo(s) > 0 ? Py.True : Py.False;
+ }
+
+ @Override
+ public PyObject __ge__(PyObject other) {
+ return unicode___ge__(other);
+ }
+
+ @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.unicode___ge___doc)
+ final PyObject unicode___ge__(PyObject other) {
+ String s = coerceForComparison(other);
+ if (s == null) {
+ return null;
+ }
+ return getString().compareTo(s) >= 0 ? Py.True : Py.False;
+ }
+
@ExposedMethod(doc = BuiltinDocs.unicode___hash___doc)
final int unicode___hash__() {
return str___hash__();
@@ -960,8 +1016,10 @@
* Interpret the object as a Java <code>String</code> for use in comparison. The return
* represents characters as UTF-16. From a <code>PyUnicode</code> we return its internal string.
* A <code>str</code> and <code>buffer</code> argument is decoded with the default encoding.
- * Equivalent to {@link #coerceToStringOrNull(PyObject)} allowing only the types supported in
- * (C)Python <code>unicode.__eq__</code>.
+ * <p>
+ * This method could be replaced by {@link #coerceToStringOrNull(PyObject)} if we were content
+ * to allowing a wider range of types to be supported in comparison operations than (C)Python
+ * <code>unicode.__eq__</code>.
*
* @param o the object to coerce
* @return an equivalent <code>String</code>
@@ -1212,9 +1270,10 @@
* {@link #coerceToUnicode(PyObject, boolean)}.
*
* @param o the object to coerce
+ * @param name of method
* @return an equivalent <code>PyUnicode</code> (or o itself, or <code>null</code>)
*/
- private static PyUnicode coerceStripSepToUnicode(PyObject o) {
+ private static PyUnicode coerceStripSepToUnicode(PyObject o, String name) {
if (o == null) {
return null;
} else if (o instanceof PyUnicode) {
@@ -1225,14 +1284,14 @@
} else if (o == Py.None) {
return null;
} else {
- throw Py.TypeError("strip arg must be None, unicode or str");
+ throw Py.TypeError(name + " arg must be None, unicode or str");
}
}
@ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode_strip_doc)
final PyObject unicode_strip(PyObject sepObj) {
- PyUnicode sep = coerceStripSepToUnicode(sepObj);
+ PyUnicode sep = coerceStripSepToUnicode(sepObj, "strip");
if (isBasicPlane()) {
// this contains only basic plane characters
@@ -1253,7 +1312,7 @@
@ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode_lstrip_doc)
final PyObject unicode_lstrip(PyObject sepObj) {
- PyUnicode sep = coerceStripSepToUnicode(sepObj);
+ PyUnicode sep = coerceStripSepToUnicode(sepObj, "lstrip");
if (isBasicPlane()) {
// this contains only basic plane characters
@@ -1273,7 +1332,7 @@
@ExposedMethod(defaults = "null", doc = BuiltinDocs.unicode_rstrip_doc)
final PyObject unicode_rstrip(PyObject sepObj) {
- PyUnicode sep = coerceStripSepToUnicode(sepObj);
+ PyUnicode sep = coerceStripSepToUnicode(sepObj, "rstrip");
if (isBasicPlane()) {
// this contains only basic plane characters
--
Repository URL: https://hg.python.org/jython
More information about the Jython-checkins
mailing list