[pypy-commit] pypy unicode-utf8: enough plumbing to pass some tests, not efficient at all

Sat Feb 25 10:33:16 EST 2017

Author: fijal
Branch: unicode-utf8
Changeset: r90356:522a73b2ba2e
Date: 2017-02-25 16:32 +0100
http://bitbucket.org/pypy/pypy/changeset/522a73b2ba2e/

Log:	enough plumbing to pass some tests, not efficient at all

diff --git a/pypy/objspace/std/bytesobject.py b/pypy/objspace/std/bytesobject.py
--- a/pypy/objspace/std/bytesobject.py
+++ b/pypy/objspace/std/bytesobject.py
@@ -702,7 +702,7 @@
         if space.isinstance_w(w_prefix, space.w_unicode):
             self_as_unicode = unicode_from_encoded_object(space, self, None,
                                                           None)
-            return self_as_unicode._startswith(space, self_as_unicode._value,
+            return self_as_unicode._startswith(space, self_as_unicode._utf8.decode("utf8"),
                                                w_prefix, start, end)
         return self._StringMethods__startswith(space, value, w_prefix, start,
                                                end)
@@ -712,7 +712,7 @@
         if space.isinstance_w(w_suffix, space.w_unicode):
             self_as_unicode = unicode_from_encoded_object(space, self, None,
                                                           None)
-            return self_as_unicode._endswith(space, self_as_unicode._value,
+            return self_as_unicode._endswith(space, self_as_unicode._utf8.decode("utf8"),
                                              w_suffix, start, end)
         return self._StringMethods__endswith(space, value, w_suffix, start,
                                              end)
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -237,7 +237,7 @@
 
         assert isinstance(w_value, W_UnicodeObject)
         w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype)
-        W_UnicodeObject.__init__(w_newobj, w_value._value)
+        W_UnicodeObject.__init__(w_newobj, w_value._utf8, w_value._length)
         return w_newobj
 
     def descr_repr(self, space):
@@ -340,8 +340,7 @@
         return mod_format(space, w_values, self, do_unicode=True)
 
     def descr_translate(self, space, w_table):
-        xxx
-        selfvalue = self._value
+        selfvalue = self._utf8.decode("utf8")
         w_sys = space.getbuiltinmodule('sys')
         maxunicode = space.int_w(space.getattr(w_sys,
                                                space.newtext("maxunicode")))
@@ -365,12 +364,12 @@
                                     hex(maxunicode + 1))
                     result.append(unichr(newval))
                 elif space.isinstance_w(w_newval, space.w_unicode):
-                    result.append(space.unicode_w(w_newval))
+                    result.append(space.utf8_w(w_newval).decode("utf8"))
                 else:
                     raise oefmt(space.w_TypeError,
                                 "character mapping must return integer, None "
                                 "or unicode")
-        return W_UnicodeObject(u''.join(result))
+        return W_UnicodeObject(u''.join(result).encode("utf8"), -1)
 
     def descr_encode(self, space, w_encoding=None, w_errors=None):
         encoding, errors = _get_encoding_and_errors(space, w_encoding,
@@ -1286,7 +1285,8 @@
 def unicode_to_decimal_w(space, w_unistr):
     if not isinstance(w_unistr, W_UnicodeObject):
         raise oefmt(space.w_TypeError, "expected unicode, got '%T'", w_unistr)
-    unistr = w_unistr._value
+    unistr = w_unistr._utf8.decode("utf8")
+    # XXX speed up
     result = ['\0'] * len(unistr)
     digits = ['0', '1', '2', '3', '4',
               '5', '6', '7', '8', '9']