[pypy-commit] pypy py3.5: Be more careful about encoding/decoding to utf-8, as even the

arigo pypy.commits at gmail.com
Mon Aug 29 05:16:10 EDT 2016


Author: Armin Rigo <arigo at tunes.org>
Branch: py3.5
Changeset: r86676:9d26d61f920a
Date: 2016-08-29 11:15 +0200
http://bitbucket.org/pypy/pypy/changeset/9d26d61f920a/

Log:	Be more careful about encoding/decoding to utf-8, as even the
	.encode('utf-8') crashes if given the unichar that is normally used
	for surrogates

diff --git a/pypy/objspace/std/marshal_impl.py b/pypy/objspace/std/marshal_impl.py
--- a/pypy/objspace/std/marshal_impl.py
+++ b/pypy/objspace/std/marshal_impl.py
@@ -371,9 +371,9 @@
     m.atom_str(TYPE_STRING, x.co_code)
     _marshal_tuple(space, x.co_consts_w, m)
     _marshal_tuple(space, x.co_names_w, m)   # list of w_unicodes
-    co_varnames_w = [space.wrap(s.decode('utf-8')) for s in x.co_varnames]
-    co_freevars_w = [space.wrap(s.decode('utf-8')) for s in x.co_freevars]
-    co_cellvars_w = [space.wrap(s.decode('utf-8')) for s in x.co_cellvars]
+    co_varnames_w = [space.wrap(_decode_utf8(space, s)) for s in x.co_varnames]
+    co_freevars_w = [space.wrap(_decode_utf8(space, s)) for s in x.co_freevars]
+    co_cellvars_w = [space.wrap(_decode_utf8(space, s)) for s in x.co_cellvars]
     _marshal_tuple(space, co_varnames_w, m)  # more lists, now of w_unicodes
     _marshal_tuple(space, co_freevars_w, m)
     _marshal_tuple(space, co_cellvars_w, m)
@@ -387,7 +387,8 @@
 
 def _unmarshal_strlist(u):
     items_w = _unmarshal_tuple_w(u)
-    return [u.space.unicode_w(w_item).encode('utf-8') for w_item in items_w]
+    return [_encode_utf8(u.space, u.space.unicode_w(w_item))
+            for w_item in items_w]
 
 def _unmarshal_tuple_w(u):
     w_obj = u.get_w_obj()
@@ -413,8 +414,8 @@
     varnames    = _unmarshal_strlist(u)
     freevars    = _unmarshal_strlist(u)
     cellvars    = _unmarshal_strlist(u)
-    filename    = space.unicode0_w(u.get_w_obj()).encode('utf-8')
-    name        = space.unicode_w(u.get_w_obj()).encode('utf-8')
+    filename    = _encode_utf8(space, space.unicode0_w(u.get_w_obj()))
+    name        = _encode_utf8(space, space.unicode_w(u.get_w_obj()))
     firstlineno = u.get_int()
     lnotab      = space.bytes_w(u.get_w_obj())
     PyCode.__init__(w_codeobj,
@@ -439,15 +440,20 @@
     if typecode != FLAG_DONE:
         m.atom_str(typecode, s)
 
+def _encode_utf8(space, u):
+    return unicodehelper.encode_utf8(space, u, allow_surrogates=True)
+
+def _decode_utf8(space, s):
+    return unicodehelper.decode_utf8(space, s, allow_surrogates=True)
+
 @marshaller(W_UnicodeObject)
 def marshal_unicode(space, w_unicode, m):
-    s = unicodehelper.encode_utf8(space, space.unicode_w(w_unicode),
-                                  allow_surrogates=True)
+    s = _encode_utf8(space, space.unicode_w(w_unicode))
     _marshal_unicode(space, s, m, w_unicode=w_unicode)
 
 @unmarshaller(TYPE_UNICODE)
 def unmarshal_unicode(space, u, tc):
-    uc = unicodehelper.decode_utf8(space, u.get_str(), allow_surrogates=True)
+    uc = _decode_utf8(space, u.get_str())
     return space.newunicode(uc)
 
 @unmarshaller(TYPE_INTERNED)


More information about the pypy-commit mailing list