[pypy-commit] pypy unicode-utf8-py3: replace utf8 with W_Unicode , saves a conversion or two

mattip pypy.commits at gmail.com
Thu Aug 9 16:30:45 EDT 2018


Author: Matti Picus <matti.picus at gmail.com>
Branch: unicode-utf8-py3
Changeset: r94985:f6f71b76311e
Date: 2018-08-09 13:28 -0700
http://bitbucket.org/pypy/pypy/changeset/f6f71b76311e/

Log:	replace utf8 with W_Unicode , saves a conversion or two

diff --git a/pypy/interpreter/astcompiler/fstring.py b/pypy/interpreter/astcompiler/fstring.py
--- a/pypy/interpreter/astcompiler/fstring.py
+++ b/pypy/interpreter/astcompiler/fstring.py
@@ -3,6 +3,7 @@
 from pypy.interpreter import error
 from pypy.interpreter import unicodehelper
 from rpython.rlib.rstring import StringBuilder
+from rpython.rlib.rutf8 import codepoints_in_utf8
 
 
 def add_constant_string(astbuilder, joined_pieces, w_string, atom_node):
@@ -21,10 +22,8 @@
     joined_pieces.append(node(w_string, atom_node.get_lineno(),
                                         atom_node.get_column()))
 
-def f_constant_string(astbuilder, joined_pieces, u, atom_node):
-    space = astbuilder.space
-    add_constant_string(astbuilder, joined_pieces, space.newtext(u),
-                        atom_node)
+def f_constant_string(astbuilder, joined_pieces, w_u, atom_node):
+    add_constant_string(astbuilder, joined_pieces, w_u, atom_node)
 
 def f_string_compile(astbuilder, source, atom_node):
     # Note: a f-string is kept as a single literal up to here.
@@ -259,19 +258,20 @@
         i += 1
 
     fstr.current_index = i
+    space = astbuilder.space
     literal = builder.build()
+    lgt = codepoints_in_utf8(literal)
     if not fstr.raw_mode and '\\' in literal:
-        space = astbuilder.space
         literal = parsestring.decode_unicode_utf8(space, literal, 0,
                                                   len(literal))
-        literal, lgt, _ = unicodehelper.decode_unicode_escape(space, literal)
-    return literal.decode('utf-8')
+        literal, pos, lgt = unicodehelper.decode_unicode_escape(space, literal)
+    return space.newtext(literal, lgt)
 
 
 def fstring_find_literal_and_expr(astbuilder, fstr, atom_node, rec):
-    # Return a tuple with the next literal part, and optionally the
+    # Return a tuple with the next literal part as a W_Unicode, and optionally the
     # following expression node.  Updates the current index inside 'fstr'.
-    literal = fstring_find_literal(astbuilder, fstr, atom_node, rec)
+    w_u = fstring_find_literal(astbuilder, fstr, atom_node, rec)
 
     s = fstr.unparsed
     i = fstr.current_index
@@ -283,7 +283,7 @@
         # We must now be the start of an expression, on a '{'.
         assert s[i] == '{'
         expr = fstring_find_expr(astbuilder, fstr, atom_node, rec)
-    return literal, expr
+    return w_u, expr
 
 
 def parse_f_string(astbuilder, joined_pieces, fstr, atom_node, rec=0):
@@ -302,11 +302,11 @@
             "really the case", atom_node)
 
     while True:
-        literal, expr = fstring_find_literal_and_expr(astbuilder, fstr,
+        w_u, expr = fstring_find_literal_and_expr(astbuilder, fstr,
                                                       atom_node, rec)
 
         # add the literal part
-        f_constant_string(astbuilder, joined_pieces, literal, atom_node)
+        f_constant_string(astbuilder, joined_pieces, w_u, atom_node)
 
         if expr is None:
             break         # We're done with this f-string.


More information about the pypy-commit mailing list