[pypy-commit] pypy py3k: Remove mixed operations between str and unicode
amauryfa
noreply at buildbot.pypy.org
Wed Oct 12 22:23:40 CEST 2011
Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: py3k
Changeset: r47998:2693bfc6f0ac
Date: 2011-10-12 22:19 +0200
http://bitbucket.org/pypy/pypy/changeset/2693bfc6f0ac/
Log: Remove mixed operations between str and unicode
diff --git a/pypy/objspace/descroperation.py b/pypy/objspace/descroperation.py
--- a/pypy/objspace/descroperation.py
+++ b/pypy/objspace/descroperation.py
@@ -675,15 +675,6 @@
# Note that space.is_w() is potentially not happy if one of them
# is None (e.g. with the thunk space)...
if w_left_src is not w_right_src: # XXX
- # -- cpython bug compatibility: see objspace/std/test/
- # -- test_unicodeobject.test_str_unicode_concat_overrides.
- # -- The following handles "unicode + string subclass" by
- # -- pretending that the unicode is a superclass of the
- # -- string, thus giving priority to the string subclass'
- # -- __radd__() method. The case "string + unicode subclass"
- # -- is handled directly by add__String_Unicode().
- if symbol == '+' and space.is_w(w_typ1, space.w_unicode):
- w_typ1 = space.w_basestring
# -- end of bug compatibility
if space.is_true(space.issubtype(w_typ2, w_typ1)):
if (w_left_src and w_right_src and
diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -196,21 +196,9 @@
def eq__Bytearray_String(space, w_bytearray, w_other):
return space.eq(str__Bytearray(space, w_bytearray), w_other)
-def eq__Bytearray_Unicode(space, w_bytearray, w_other):
- return space.w_False
-
-def eq__Unicode_Bytearray(space, w_other, w_bytearray):
- return space.w_False
-
def ne__Bytearray_String(space, w_bytearray, w_other):
return space.ne(str__Bytearray(space, w_bytearray), w_other)
-def ne__Bytearray_Unicode(space, w_bytearray, w_other):
- return space.w_True
-
-def ne__Unicode_Bytearray(space, w_other, w_bytearray):
- return space.w_True
-
def _min(a, b):
if a < b:
return a
diff --git a/pypy/objspace/std/ropeunicodeobject.py b/pypy/objspace/std/ropeunicodeobject.py
--- a/pypy/objspace/std/ropeunicodeobject.py
+++ b/pypy/objspace/std/ropeunicodeobject.py
@@ -24,19 +24,6 @@
def wrapunicode(space, uni):
return W_RopeUnicodeObject(rope.rope_from_unicode(uni))
-def unicode_from_string(space, w_str):
- from pypy.objspace.std.unicodetype import getdefaultencoding
- assert isinstance(w_str, W_RopeObject)
- encoding = getdefaultencoding(space)
- w_retval = decode_string(space, w_str, encoding, "strict")
- if not space.isinstance_w(w_retval, space.w_unicode):
- raise operationerrfmt(
- space.w_TypeError,
- "decoder did not return an unicode object (type '%s')",
- space.type(w_retval).getname(space))
- assert isinstance(w_retval, W_RopeUnicodeObject)
- return w_retval
-
def decode_string(space, w_str, encoding, errors):
from pypy.objspace.std.unicodetype import decode_object
if errors is None or errors == "strict":
@@ -112,8 +99,6 @@
def ropeunicode_w(space, w_str):
if isinstance(w_str, W_RopeUnicodeObject):
return w_str._node
- if isinstance(w_str, W_RopeObject):
- return unicode_from_string(space, w_str)._node
return rope.LiteralUnicodeNode(space.unicode_w(w_str))
@@ -157,12 +142,6 @@
raise OperationError(space.w_UnicodeEncodeError, space.newtuple([w_encoding, w_unistr, w_start, w_end, w_reason]))
return ''.join(result)
-# string-to-unicode delegation
-def delegate_Rope2RopeUnicode(space, w_rope):
- w_uni = unicode_from_string(space, w_rope)
- assert isinstance(w_uni, W_RopeUnicodeObject) # help the annotator!
- return w_uni
-
def str__RopeUnicode(space, w_uni):
return space.call_method(w_uni, 'encode')
@@ -183,19 +162,9 @@
def eq__RopeUnicode_RopeUnicode(space, w_str1, w_str2):
return space.newbool(_eq(w_str1, w_str2))
-def eq__RopeUnicode_Rope(space, w_runi, w_rope):
- from pypy.objspace.std.unicodeobject import _unicode_string_comparison
- return _unicode_string_comparison(space, w_runi, w_rope,
- False, unicode_from_string)
-
def ne__RopeUnicode_RopeUnicode(space, w_str1, w_str2):
return space.newbool(not _eq(w_str1, w_str2))
-def ne__RopeUnicode_Rope(space, w_runi, w_rope):
- from pypy.objspace.std.unicodeobject import _unicode_string_comparison
- return _unicode_string_comparison(space, w_runi, w_rope,
- True, unicode_from_string)
-
def gt__RopeUnicode_RopeUnicode(space, w_str1, w_str2):
n1 = w_str1._node
n2 = w_str2._node
@@ -224,20 +193,11 @@
raise OperationError(space.w_OverflowError,
space.wrap("string too long"))
-def add__Rope_RopeUnicode(space, w_left, w_right):
- return space.add(unicode_from_string(space, w_left) , w_right)
-
-def add__RopeUnicode_Rope(space, w_left, w_right):
- return space.add(w_left, unicode_from_string(space, w_right))
-
def contains__RopeUnicode_RopeUnicode(space, w_container, w_item):
item = w_item._node
container = w_container._node
return space.newbool(rope.find(container, item) != -1)
-def contains__Rope_RopeUnicode(space, w_container, w_item):
- return space.contains(unicode_from_string(space, w_container), w_item )
-
def unicode_join__RopeUnicode_ANY(space, w_self, w_list):
l_w = space.listview(w_list)
delim = w_self._node
@@ -254,10 +214,8 @@
if isinstance(w_item, W_RopeUnicodeObject):
# shortcut for performane
item = w_item._node
- elif space.isinstance_w(w_item, space.w_str):
- item = unicode_from_string(space, w_item)._node
else:
- msg = 'sequence item %d: expected string or Unicode'
+ msg = 'sequence item %d: expected string'
raise operationerrfmt(space.w_TypeError, msg, i)
values_list.append(item)
try:
@@ -388,27 +346,17 @@
return W_RopeUnicodeObject(rope.strip(w_self._node, True, True, _contains,
w_chars._node.flatten_unicode()))
-def unicode_strip__RopeUnicode_Rope(space, w_self, w_chars):
- return space.call_method(w_self, 'strip',
- unicode_from_string(space, w_chars))
-
def unicode_lstrip__RopeUnicode_None(space, w_self, w_chars):
return W_RopeUnicodeObject(rope.strip(w_self._node, True, False, _isspace))
def unicode_lstrip__RopeUnicode_RopeUnicode(space, w_self, w_chars):
return W_RopeUnicodeObject(rope.strip(w_self._node, True, False, _contains,
w_chars._node.flatten_unicode()))
-def unicode_lstrip__RopeUnicode_Rope(space, w_self, w_chars):
- return space.call_method(w_self, 'lstrip',
- unicode_from_string(space, w_chars))
def unicode_rstrip__RopeUnicode_None(space, w_self, w_chars):
return W_RopeUnicodeObject(rope.strip(w_self._node, False, True, _isspace))
def unicode_rstrip__RopeUnicode_RopeUnicode(space, w_self, w_chars):
return W_RopeUnicodeObject(rope.strip(w_self._node, False, True, _contains,
w_chars._node.flatten_unicode()))
-def unicode_rstrip__RopeUnicode_Rope(space, w_self, w_chars):
- return space.call_method(w_self, 'rstrip',
- unicode_from_string(space, w_chars))
def unicode_capitalize__RopeUnicode(space, w_self):
input = w_self._node
@@ -979,49 +927,3 @@
from pypy.objspace.std import unicodetype
register_all(vars(), unicodetype)
-
-# str.strip(unicode) needs to convert self to unicode and call unicode.strip we
-# use the following magic to register strip_string_unicode as a String
-# multimethod.
-
-# XXX couldn't string and unicode _share_ the multimethods that make up their
-# methods?
-
-class str_methods:
- from pypy.objspace.std import stringtype
- W_RopeUnicodeObject = W_RopeUnicodeObject
- from pypy.objspace.std.ropeobject import W_RopeObject
- def str_strip__Rope_RopeUnicode(space, w_self, w_chars):
- return space.call_method(unicode_from_string(space, w_self),
- 'strip', w_chars)
- def str_lstrip__Rope_RopeUnicode(space, w_self, w_chars):
- return space.call_method(unicode_from_string(space, w_self),
- 'lstrip', w_chars)
- def str_rstrip__Rope_RopeUnicode(space, w_self, w_chars):
- return space.call_method(unicode_from_string(space, w_self),
- 'rstrip', w_chars)
- def str_count__Rope_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
- return space.call_method(unicode_from_string(space, w_self),
- 'count', w_substr, w_start, w_end)
- def str_find__Rope_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
- return space.call_method(unicode_from_string(space, w_self),
- 'find', w_substr, w_start, w_end)
- def str_rfind__Rope_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
- return space.call_method(unicode_from_string(space, w_self),
- 'rfind', w_substr, w_start, w_end)
- def str_index__Rope_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
- return space.call_method(unicode_from_string(space, w_self),
- 'index', w_substr, w_start, w_end)
- def str_rindex__Rope_RopeUnicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
- return space.call_method(unicode_from_string(space, w_self),
- 'rindex', w_substr, w_start, w_end)
- def str_replace__Rope_RopeUnicode_RopeUnicode_ANY(space, w_self, w_old, w_new, w_maxsplit):
- return space.call_method(unicode_from_string(space, w_self),
- 'replace', w_old, w_new, w_maxsplit)
- def str_split__Rope_RopeUnicode_ANY(space, w_self, w_delim, w_maxsplit):
- return space.call_method(unicode_from_string(space, w_self),
- 'split', w_delim, w_maxsplit)
- def str_rsplit__Rope_RopeUnicode_ANY(space, w_self, w_delim, w_maxsplit):
- return space.call_method(unicode_from_string(space, w_self),
- 'rsplit', w_delim, w_maxsplit)
- register_all(vars(), stringtype)
diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py
--- a/pypy/objspace/std/stringobject.py
+++ b/pypy/objspace/std/stringobject.py
@@ -996,10 +996,8 @@
def str_decode__String_ANY_ANY(space, w_string, w_encoding=None, w_errors=None):
from pypy.objspace.std.unicodetype import _get_encoding_and_errors, \
- unicode_from_string, decode_object
+ decode_object
encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors)
- if encoding is None and errors is None:
- return unicode_from_string(space, w_string)
return decode_object(space, w_string, encoding, errors)
def str_encode__String_ANY_ANY(space, w_string, w_encoding=None, w_errors=None):
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -4,6 +4,7 @@
class AppTestUnicodeStringStdOnly:
def test_compares(self):
+ assert type('a') != type(b'a')
assert 'a' != b'a'
assert b'a' != 'a'
assert not ('a' == 5)
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -4,7 +4,6 @@
from pypy.interpreter import gateway
from pypy.interpreter.error import OperationError, operationerrfmt
from pypy.objspace.std.stringobject import W_StringObject, make_rsplit_with_delim
-from pypy.objspace.std.ropeobject import W_RopeObject
from pypy.objspace.std.noneobject import W_NoneObject
from pypy.objspace.std.sliceobject import W_SliceObject, normalize_simple_slice
from pypy.objspace.std import slicetype, newformat
@@ -78,26 +77,6 @@
raise OperationError(space.w_UnicodeEncodeError, space.newtuple([w_encoding, w_unistr, w_start, w_end, w_reason]))
return ''.join(result)
-# checks if should trigger an unicode warning
-def _unicode_string_comparison(space, w_uni, w_str, inverse, uni_from_str):
- try:
- w_uni2 = uni_from_str(space, w_str)
- except OperationError, e:
- if e.match(space, space.w_UnicodeDecodeError):
- if inverse:
- msg = "Unicode unequal comparison failed to convert both " \
- "arguments to Unicode - interpreting them as being unequal"
- else :
- msg = "Unicode equal comparison failed to convert both " \
- "arguments to Unicode - interpreting them as being unequal"
- space.warn(msg, space.w_UnicodeWarning)
- return space.newbool(inverse)
- raise
- result = space.eq(w_uni, w_uni2)
- if inverse:
- return space.not_(result)
- return result
-
def str__Unicode(space, w_uni):
from pypy.objspace.std.unicodetype import encode_object
return encode_object(space, w_uni, None, None)
@@ -105,20 +84,6 @@
def eq__Unicode_Unicode(space, w_left, w_right):
return space.newbool(w_left._value == w_right._value)
-def eq__Unicode_String(space, w_uni, w_str):
- from pypy.objspace.std.unicodetype import unicode_from_string
- return _unicode_string_comparison(space, w_uni, w_str,
- False, unicode_from_string)
-
-eq__Unicode_Rope = eq__Unicode_String
-
-def ne__Unicode_String(space, w_uni, w_str):
- from pypy.objspace.std.unicodetype import unicode_from_string
- return _unicode_string_comparison(space, w_uni, w_str,
- True, unicode_from_string)
-
-ne__Unicode_Rope = ne__Unicode_String
-
def lt__Unicode_Unicode(space, w_left, w_right):
left = w_left._value
right = w_right._value
@@ -137,38 +102,6 @@
def add__Unicode_Unicode(space, w_left, w_right):
return W_UnicodeObject(w_left._value + w_right._value)
-def add__String_Unicode(space, w_left, w_right):
- # this function is needed to make 'abc'.__add__(u'def') return
- # u'abcdef' instead of NotImplemented. This is what occurs on
- # top of CPython.
- from pypy.objspace.std.unicodetype import unicode_from_string
- # XXX fragile implementation detail: for "string + unicode subclass",
- # if the unicode subclass overrides __radd__(), then it will be
- # called (see test_str_unicode_concat_overrides). This occurs as a
- # result of the following call to space.add() in which the first
- # argument is a unicode and the second argument a subclass of unicode
- # (and thus the usual logic about calling __radd__() first applies).
- return space.add(unicode_from_string(space, w_left) , w_right)
-
-add__Rope_Unicode = add__String_Unicode
-
-def add__Unicode_String(space, w_left, w_right):
- # this function is needed to make 'abc'.__radd__(u'def') return
- # u'defabc', although it's completely unclear if that's necessary
- # given that CPython doesn't even have a method str.__radd__().
- from pypy.objspace.std.unicodetype import unicode_from_string
- return space.add(w_left, unicode_from_string(space, w_right))
- # Note about "unicode + string subclass": look for
- # "cpython bug compatibility" in descroperation.py
-
-add__Unicode_Rope = add__Unicode_String
-
-def contains__String_Unicode(space, w_container, w_item):
- from pypy.objspace.std.unicodetype import unicode_from_string
- return space.contains(unicode_from_string(space, w_container), w_item )
-contains__Rope_Unicode = contains__String_Unicode
-
-
def contains__Unicode_Unicode(space, w_container, w_item):
item = w_item._value
container = w_container._value
@@ -373,33 +306,16 @@
return _strip_none(space, w_self, 1, 1)
def unicode_strip__Unicode_Unicode(space, w_self, w_chars):
return _strip(space, w_self, w_chars, 1, 1)
-def unicode_strip__Unicode_String(space, w_self, w_chars):
- from pypy.objspace.std.unicodetype import unicode_from_string
- return space.call_method(w_self, 'strip',
- unicode_from_string(space, w_chars))
-unicode_strip__Unicode_Rope = unicode_strip__Unicode_String
def unicode_lstrip__Unicode_None(space, w_self, w_chars):
return _strip_none(space, w_self, 1, 0)
def unicode_lstrip__Unicode_Unicode(space, w_self, w_chars):
return _strip(space, w_self, w_chars, 1, 0)
-def unicode_lstrip__Unicode_String(space, w_self, w_chars):
- from pypy.objspace.std.unicodetype import unicode_from_string
- return space.call_method(w_self, 'lstrip',
- unicode_from_string(space, w_chars))
-
-unicode_lstrip__Unicode_Rope = unicode_lstrip__Unicode_String
def unicode_rstrip__Unicode_None(space, w_self, w_chars):
return _strip_none(space, w_self, 0, 1)
def unicode_rstrip__Unicode_Unicode(space, w_self, w_chars):
return _strip(space, w_self, w_chars, 0, 1)
-def unicode_rstrip__Unicode_String(space, w_self, w_chars):
- from pypy.objspace.std.unicodetype import unicode_from_string
- return space.call_method(w_self, 'rstrip',
- unicode_from_string(space, w_chars))
-
-unicode_rstrip__Unicode_Rope = unicode_rstrip__Unicode_String
def unicode_capitalize__Unicode(space, w_self):
input = w_self._value
@@ -928,71 +844,3 @@
from pypy.objspace.std import unicodetype
register_all(vars(), unicodetype)
-# str.strip(unicode) needs to convert self to unicode and call unicode.strip we
-# use the following magic to register strip_string_unicode as a String
-# multimethod.
-
-# XXX couldn't string and unicode _share_ the multimethods that make up their
-# methods?
-
-class str_methods:
- from pypy.objspace.std import stringtype
- W_UnicodeObject = W_UnicodeObject
- from pypy.objspace.std.stringobject import W_StringObject
- from pypy.objspace.std.ropeobject import W_RopeObject
- def str_strip__String_Unicode(space, w_self, w_chars):
- from pypy.objspace.std.unicodetype import unicode_from_string
- return space.call_method(unicode_from_string(space, w_self),
- 'strip', w_chars)
- str_strip__Rope_Unicode = str_strip__String_Unicode
- def str_lstrip__String_Unicode(space, w_self, w_chars):
- from pypy.objspace.std.unicodetype import unicode_from_string
- return space.call_method(unicode_from_string(space, w_self),
- 'lstrip', w_chars)
- str_lstrip__Rope_Unicode = str_lstrip__String_Unicode
- def str_rstrip__String_Unicode(space, w_self, w_chars):
- from pypy.objspace.std.unicodetype import unicode_from_string
- return space.call_method(unicode_from_string(space, w_self),
- 'rstrip', w_chars)
- str_rstrip__Rope_Unicode = str_rstrip__String_Unicode
- def str_count__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
- from pypy.objspace.std.unicodetype import unicode_from_string
- return space.call_method(unicode_from_string(space, w_self),
- 'count', w_substr, w_start, w_end)
- str_count__Rope_Unicode_ANY_ANY = str_count__String_Unicode_ANY_ANY
- def str_find__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
- from pypy.objspace.std.unicodetype import unicode_from_string
- return space.call_method(unicode_from_string(space, w_self),
- 'find', w_substr, w_start, w_end)
- str_find__Rope_Unicode_ANY_ANY = str_find__String_Unicode_ANY_ANY
- def str_rfind__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
- from pypy.objspace.std.unicodetype import unicode_from_string
- return space.call_method(unicode_from_string(space, w_self),
- 'rfind', w_substr, w_start, w_end)
- str_rfind__Rope_Unicode_ANY_ANY = str_rfind__String_Unicode_ANY_ANY
- def str_index__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
- from pypy.objspace.std.unicodetype import unicode_from_string
- return space.call_method(unicode_from_string(space, w_self),
- 'index', w_substr, w_start, w_end)
- str_index__Rope_Unicode_ANY_ANY = str_index__String_Unicode_ANY_ANY
- def str_rindex__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
- from pypy.objspace.std.unicodetype import unicode_from_string
- return space.call_method(unicode_from_string(space, w_self),
- 'rindex', w_substr, w_start, w_end)
- str_rindex__Rope_Unicode_ANY_ANY = str_rindex__String_Unicode_ANY_ANY
- def str_replace__String_Unicode_Unicode_ANY(space, w_self, w_old, w_new, w_maxsplit):
- from pypy.objspace.std.unicodetype import unicode_from_string
- return space.call_method(unicode_from_string(space, w_self),
- 'replace', w_old, w_new, w_maxsplit)
- str_replace__Rope_Unicode_Unicode_ANY = str_replace__String_Unicode_Unicode_ANY
- def str_split__String_Unicode_ANY(space, w_self, w_delim, w_maxsplit):
- from pypy.objspace.std.unicodetype import unicode_from_string
- return space.call_method(unicode_from_string(space, w_self),
- 'split', w_delim, w_maxsplit)
- str_split__Rope_Unicode_ANY = str_split__String_Unicode_ANY
- def str_rsplit__String_Unicode_ANY(space, w_self, w_delim, w_maxsplit):
- from pypy.objspace.std.unicodetype import unicode_from_string
- return space.call_method(unicode_from_string(space, w_self),
- 'rsplit', w_delim, w_maxsplit)
- str_rsplit__Rope_Unicode_ANY = str_rsplit__String_Unicode_ANY
- register_all(vars(), stringtype)
diff --git a/pypy/objspace/std/unicodetype.py b/pypy/objspace/std/unicodetype.py
--- a/pypy/objspace/std/unicodetype.py
+++ b/pypy/objspace/std/unicodetype.py
@@ -313,22 +313,6 @@
return w_res
return unicode_from_encoded_object(space, w_res, None, "strict")
-def unicode_from_string(space, w_str):
- # this is a performance and bootstrapping hack
- if space.config.objspace.std.withropeunicode:
- from pypy.objspace.std.ropeunicodeobject import unicode_from_string
- return unicode_from_string(space, w_str)
- encoding = getdefaultencoding(space)
- from pypy.objspace.std.unicodeobject import W_UnicodeObject
- if encoding != 'ascii':
- return unicode_from_encoded_object(space, w_str, encoding, "strict")
- s = space.str_w(w_str)
- try:
- return W_UnicodeObject(s.decode("ascii"))
- except UnicodeDecodeError:
- # raising UnicodeDecodeError is messy, "please crash for me"
- return unicode_from_encoded_object(space, w_str, "ascii", "strict")
-
def unicode_decode__unitypedef_ANY_ANY(space, w_unicode, w_encoding=None,
w_errors=None):
return space.call_method(space.str(w_unicode), 'decode',
More information about the pypy-commit
mailing list