[pypy-commit] pypy py3.5: Fix str.startswith/str.endswith corner case (CPython issue #24284)

Thu Oct 27 21:16:39 EDT 2016

Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch: py3.5
Changeset: r87973:c33749e916de
Date: 2016-10-28 02:16 +0100
http://bitbucket.org/pypy/pypy/changeset/c33749e916de/

Log:	Fix str.startswith/str.endswith corner case (CPython issue #24284)

diff --git a/pypy/objspace/std/stringmethods.py b/pypy/objspace/std/stringmethods.py
--- a/pypy/objspace/std/stringmethods.py
+++ b/pypy/objspace/std/stringmethods.py
@@ -612,14 +612,8 @@
 
     def _startswith(self, space, value, w_prefix, start, end):
         prefix = self._op_val(space, w_prefix)
-        if start > len(value):
-            return self._starts_ends_overflow(prefix)
         return startswith(value, prefix, start, end)
 
-    def _starts_ends_overflow(self, prefix):
-        return False     # bug-to-bug compat: this is for strings and
-                         # bytearrays, but overridden for unicodes
-
     def descr_endswith(self, space, w_suffix, w_start=None, w_end=None):
         (value, start, end) = self._convert_idx_params(space, w_start, w_end)
         if space.isinstance_w(w_suffix, space.w_tuple):
@@ -643,8 +637,6 @@
 
     def _endswith(self, space, value, w_prefix, start, end):
         prefix = self._op_val(space, w_prefix)
-        if start > len(value):
-            return self._starts_ends_overflow(prefix)
         return endswith(value, prefix, start, end)
 
     def _strip(self, space, w_chars, left, right):
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -39,7 +39,7 @@
         assert not ('a' == 5)
         assert 'a' != 5
         raises(TypeError, "'a' < 5")
-        
+
 
 class AppTestUnicodeString:
     spaceconfig = dict(usemodules=('unicodedata',))
@@ -54,7 +54,7 @@
             assert a == b
             assert type(a) == type(b)
         check(', '.join(['a']), 'a')
-        raises(TypeError, ','.join, [b'a']) 
+        raises(TypeError, ','.join, [b'a'])
         exc = raises(TypeError, ''.join, ['a', 2, 3])
         assert 'sequence item 1' in str(exc.value)
 
@@ -211,7 +211,7 @@
         assert "_var".isidentifier() is True
         assert "_!var".isidentifier() is False
         assert "3abc".isidentifier() is False
-        
+
     def test_capitalize(self):
         assert "brown fox".capitalize() == "Brown fox"
         assert ' hello '.capitalize() == ' hello '
@@ -395,10 +395,10 @@
     def test_startswith_too_large(self):
         assert u'ab'.startswith(u'b', 1) is True
         assert u'ab'.startswith(u'', 2) is True
-        assert u'ab'.startswith(u'', 3) is True   # not False
+        assert u'ab'.startswith(u'', 3) is False
         assert u'ab'.endswith(u'b', 1) is True
         assert u'ab'.endswith(u'', 2) is True
-        assert u'ab'.endswith(u'', 3) is True   # not False
+        assert u'ab'.endswith(u'', 3) is False
 
     def test_startswith_tuples(self):
         assert 'hello'.startswith(('he', 'ha'))
@@ -581,7 +581,7 @@
             '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
             '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
             '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
-            ' Nunstuck git und'.encode('utf-8') == 
+            ' Nunstuck git und'.encode('utf-8') ==
             b'\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
             b'\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
             b'\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
@@ -595,9 +595,9 @@
         )
 
         # UTF-8 specific decoding tests
-        assert str(b'\xf0\xa3\x91\x96', 'utf-8') == '\U00023456' 
-        assert str(b'\xf0\x90\x80\x82', 'utf-8') == '\U00010002' 
-        assert str(b'\xe2\x82\xac', 'utf-8') == '\u20ac' 
+        assert str(b'\xf0\xa3\x91\x96', 'utf-8') == '\U00023456'
+        assert str(b'\xf0\x90\x80\x82', 'utf-8') == '\U00010002'
+        assert str(b'\xe2\x82\xac', 'utf-8') == '\u20ac'
         # Invalid Continuation Bytes, EOF
         raises(UnicodeDecodeError, b'\xc4\x00'.decode, 'utf-8')
         raises(UnicodeDecodeError, b'\xe2\x82'.decode, 'utf-8')
@@ -609,7 +609,7 @@
         raises(UnicodeDecodeError, b'\xf5\x80\x81\x82'.decode, 'utf-8')
         raises(UnicodeDecodeError, b'\xf4\x90\x80\x80'.decode, 'utf-8')
         # CESU-8
-        raises(UnicodeDecodeError, b'\xed\xa0\xbc\xed\xb2\xb1'.decode, 'utf-8') 
+        raises(UnicodeDecodeError, b'\xed\xa0\xbc\xed\xb2\xb1'.decode, 'utf-8')
 
     def test_codecs_errors(self):
         # Error handling (encoding)
@@ -641,7 +641,7 @@
         x = '\U00090418\u027d\U000582b9\u54c3\U000fcb6e'
         y = "'\\U00090418\u027d\\U000582b9\u54c3\\U000fcb6e'"
         assert (repr(x) == y)
-        assert (repr('\n') == 
+        assert (repr('\n') ==
                 "'\\n'")
 
 
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -520,9 +520,6 @@
     descr_ljust = _fix_fillchar(StringMethods.descr_ljust)
     descr_rjust = _fix_fillchar(StringMethods.descr_rjust)
 
-    def _starts_ends_overflow(self, prefix):
-        return len(prefix) == 0
-
     @staticmethod
     def _iter_getitem_result(self, space, index):
         assert isinstance(self, W_UnicodeObject)