[pypy-commit] pypy unicode-utf8: enough plumbing to pass some tests, not efficient at all
fijal
pypy.commits at gmail.com
Sat Feb 25 10:33:16 EST 2017
Author: fijal
Branch: unicode-utf8
Changeset: r90356:522a73b2ba2e
Date: 2017-02-25 16:32 +0100
http://bitbucket.org/pypy/pypy/changeset/522a73b2ba2e/
Log: enough plumbing to pass some tests, not efficient at all
diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -702,7 +702,7 @@
if space.isinstance_w(w_prefix, space.w_unicode):
self_as_unicode = unicode_from_encoded_object(space, self, None,
None)
- return self_as_unicode._startswith(space, self_as_unicode._value,
+ return self_as_unicode._startswith(space, self_as_unicode._utf8.decode("utf8"),
w_prefix, start, end)
return self._StringMethods__startswith(space, value, w_prefix, start,
end)
@@ -712,7 +712,7 @@
if space.isinstance_w(w_suffix, space.w_unicode):
self_as_unicode = unicode_from_encoded_object(space, self, None,
None)
- return self_as_unicode._endswith(space, self_as_unicode._value,
+ return self_as_unicode._endswith(space, self_as_unicode._utf8.decode("utf8"),
w_suffix, start, end)
return self._StringMethods__endswith(space, value, w_suffix, start,
end)
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -237,7 +237,7 @@
assert isinstance(w_value, W_UnicodeObject)
w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype)
- W_UnicodeObject.__init__(w_newobj, w_value._value)
+ W_UnicodeObject.__init__(w_newobj, w_value._utf8, w_value._length)
return w_newobj
def descr_repr(self, space):
@@ -340,8 +340,7 @@
return mod_format(space, w_values, self, do_unicode=True)
def descr_translate(self, space, w_table):
- xxx
- selfvalue = self._value
+ selfvalue = self._utf8.decode("utf8")
w_sys = space.getbuiltinmodule('sys')
maxunicode = space.int_w(space.getattr(w_sys,
space.newtext("maxunicode")))
@@ -365,12 +364,12 @@
hex(maxunicode + 1))
result.append(unichr(newval))
elif space.isinstance_w(w_newval, space.w_unicode):
- result.append(space.unicode_w(w_newval))
+ result.append(space.utf8_w(w_newval).decode("utf8"))
else:
raise oefmt(space.w_TypeError,
"character mapping must return integer, None "
"or unicode")
- return W_UnicodeObject(u''.join(result))
+ return W_UnicodeObject(u''.join(result).encode("utf8"), -1)
def descr_encode(self, space, w_encoding=None, w_errors=None):
encoding, errors = _get_encoding_and_errors(space, w_encoding,
@@ -1286,7 +1285,8 @@
def unicode_to_decimal_w(space, w_unistr):
if not isinstance(w_unistr, W_UnicodeObject):
raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr)
- unistr = w_unistr._value
+ unistr = w_unistr._utf8.decode("utf8")
+ # XXX speed up
result = ['\0'] * len(unistr)
digits = ['0', '1', '2', '3', '4',
'5', '6', '7', '8', '9']
More information about the pypy-commit
mailing list