[pypy-commit] pypy py3.5-fstring-pep498: Split the logic into its own file

Tue Jan 24 04:42:29 EST 2017

Author: Armin Rigo <arigo at tunes.org>
Branch: py3.5-fstring-pep498
Changeset: r89721:9f329ca0b48f
Date: 2017-01-24 08:50 +0100
http://bitbucket.org/pypy/pypy/changeset/9f329ca0b48f/

Log:	Split the logic into its own file

diff --git a/pypy/interpreter/astcompiler/astbuilder.py b/pypy/interpreter/astcompiler/astbuilder.py
--- a/pypy/interpreter/astcompiler/astbuilder.py
+++ b/pypy/interpreter/astcompiler/astbuilder.py
@@ -1,9 +1,9 @@
 from pypy.interpreter.astcompiler import ast, consts, misc
 from pypy.interpreter.astcompiler import asthelpers # Side effects
+from pypy.interpreter.astcompiler import fstring
 from pypy.interpreter import error
 from pypy.interpreter.pyparser.pygram import syms, tokens
 from pypy.interpreter.pyparser.error import SyntaxError
-from pypy.interpreter.pyparser import parsestring
 from rpython.rlib.objectmodel import always_inline, we_are_translated
 
 
@@ -1191,150 +1191,6 @@
             i += 3
         return (i,key,value)
 
-    def _add_constant_string(self, joined_pieces, w_string, atom_node):
-        space = self.space
-        is_unicode = space.isinstance_w(w_string, space.w_unicode)
-        # Implement implicit string concatenation.
-        if joined_pieces:
-            prev = joined_pieces[-1]
-            if is_unicode and isinstance(prev, ast.Str):
-                w_string = space.add(prev.s, w_string)
-                del joined_pieces[-1]
-            elif not is_unicode and isinstance(prev, ast.Bytes):
-                w_string = space.add(prev.s, w_string)
-                del joined_pieces[-1]
-        node = ast.Str if is_unicode else ast.Bytes
-        joined_pieces.append(node(w_string, atom_node.get_lineno(),
-                                            atom_node.get_column()))
-
-    def _f_constant_string(self, joined_pieces, u, atom_node):
-        self._add_constant_string(joined_pieces, self.space.newunicode(u),
-                                  atom_node)
-
-    def _f_string_compile(self, source, atom_node):
-        # Note: a f-string is kept as a single literal up to here.
-        # At this point only, we recursively call the AST compiler
-        # on all the '{expr}' parts.  The 'expr' part is not parsed
-        # or even tokenized together with the rest of the source code!
-        from pypy.interpreter.pyparser import pyparse
-
-        # complain if 'source' is only whitespace or an empty string
-        for c in source:
-            if c not in ' \t\n\r\v\f':
-                break
-        else:
-            self.error("f-string: empty expression not allowed", atom_node)
-
-        if self.recursive_parser is None:
-            self.error("internal error: parser not available for parsing "
-                       "the expressions inside the f-string", atom_node)
-        source = '(%s)' % source.encode('utf-8')
-
-        info = pyparse.CompileInfo("<fstring>", "eval",
-                                   consts.PyCF_SOURCE_IS_UTF8 |
-                                   consts.PyCF_IGNORE_COOKIE |
-                                   consts.PyCF_REFUSE_COMMENTS,
-                                   optimize=self.compile_info.optimize)
-        parse_tree = self.recursive_parser.parse_source(source, info)
-        return ast_from_node(self.space, parse_tree, info)
-
-    def _f_string_expr(self, joined_pieces, u, start, atom_node, rec=0):
-        conversion = -1     # the conversion char.  -1 if not specified.
-        format_spec = None
-        nested_depth = 0    # nesting level for braces/parens/brackets in exprs
-        p = start
-        while p < len(u):
-            ch = u[p]
-            p += 1
-            if ch in u'[{(':
-                nested_depth += 1
-            elif nested_depth > 0 and ch in u']})':
-                nested_depth -= 1
-            elif nested_depth == 0 and ch in u'!:}':
-                # special-case '!='
-                if ch == u'!' and p < len(u) and u[p] == u'=':
-                    continue
-                break     # normal way out of this loop
-        else:
-            ch = u'\x00'
-        #
-        if nested_depth > 0:
-            self.error("f-string: mismatched '(', '{' or '['", atom_node)
-        end_expression = p - 1
-        if ch == u'!':
-            if p + 1 < len(u):
-                conversion = ord(u[p])
-                ch = u[p + 1]
-                p += 2
-            if conversion not in (ord('s'), ord('r'), ord('a')):
-                self.error("f-string: invalid conversion character: "
-                           "expected 's', 'r', or 'a'", atom_node)
-        if ch == u':':
-            if rec >= 2:
-                self.error("f-string: expressions nested too deeply", atom_node)
-            subpieces = []
-            p = self._parse_f_string(subpieces, u, p, atom_node, rec + 1)
-            format_spec = self._f_string_to_ast_node(subpieces, atom_node)
-            ch = u[p] if p >= 0 else u'\x00'
-            p += 1
-
-        if ch != u'}':
-            self.error("f-string: expecting '}'", atom_node)
-        end_f_string = p
-        assert end_expression >= start
-        expr = self._f_string_compile(u[start:end_expression], atom_node)
-        assert isinstance(expr, ast.Expression)
-        fval = ast.FormattedValue(expr.body, conversion, format_spec,
-                                  atom_node.get_lineno(),
-                                  atom_node.get_column())
-        joined_pieces.append(fval)
-        return end_f_string
-
-    def _parse_f_string(self, joined_pieces, u, start, atom_node, rec=0):
-        space = self.space
-        p1 = u.find(u'{', start)
-        prestart = start
-        while True:
-            if p1 < 0:
-                p1 = len(u)
-            p2 = u.find(u'}', start, p1)
-            if p2 >= 0:
-                self._f_constant_string(joined_pieces, u[prestart:p2],
-                                        atom_node)
-                pn = p2 + 1
-                if pn < len(u) and u[pn] == u'}':    # '}}' => single '}'
-                    start = pn + 1
-                    prestart = pn
-                    continue
-                return p2     # found a single '}', stop here
-            self._f_constant_string(joined_pieces, u[prestart:p1], atom_node)
-            if p1 == len(u):
-                return -1     # no more '{' or '}' left
-            pn = p1 + 1
-            if pn < len(u) and u[pn] == u'{':    # '{{' => single '{'
-                start = pn + 1
-                prestart = pn
-            else:
-                assert u[p1] == u'{'
-                start = self._f_string_expr(joined_pieces, u, pn,
-                                            atom_node, rec)
-                assert u[start - 1] == u'}'
-                prestart = start
-            p1 = u.find(u'{', start)
-
-    def _f_string_to_ast_node(self, joined_pieces, atom_node):
-        # remove empty Strs
-        values = [node for node in joined_pieces
-                       if not (isinstance(node, ast.Str) and not node.s)]
-        if len(values) > 1:
-            return ast.JoinedStr(values, atom_node.get_lineno(),
-                                         atom_node.get_column())
-        elif len(values) == 1:
-            return values[0]
-        else:
-            assert len(joined_pieces) > 0    # they are all empty strings
-            return joined_pieces[0]
-
     def handle_atom(self, atom_node):
         first_child = atom_node.get_child(0)
         first_child_type = first_child.type
@@ -1354,38 +1210,7 @@
                                 first_child.get_column())
         #
         elif first_child_type == tokens.STRING:
-            space = self.space
-            encoding = self.compile_info.encoding
-            joined_pieces = []
-            for i in range(atom_node.num_children()):
-                try:
-                    w_next, saw_f = parsestring.parsestr(
-                            space, encoding, atom_node.get_child(i).get_value())
-                except error.OperationError as e:
-                    if not (e.match(space, space.w_UnicodeError) or
-                            e.match(space, space.w_ValueError)):
-                        raise
-                    # Unicode/ValueError in literal: turn into SyntaxError
-                    raise self.error(e.errorstr(space), atom_node)
-                if not saw_f:
-                    self._add_constant_string(joined_pieces, w_next, atom_node)
-                else:
-                    p = self._parse_f_string(joined_pieces,
-                                             space.unicode_w(w_next), 0,
-                                             atom_node)
-                    if p != -1:
-                        self.error("f-string: single '}' is not allowed",
-                                   atom_node)
-            if len(joined_pieces) == 1:   # <= the common path
-                return joined_pieces[0]   # ast.Str, Bytes or FormattedValue
-            # with more than one piece, it is a combination of Str and
-            # FormattedValue pieces---if there is a Bytes, then we got
-            # an invalid mixture of bytes and unicode literals
-            for node in joined_pieces:
-                if isinstance(node, ast.Bytes):
-                    self.error("cannot mix bytes and nonbytes literals",
-                               atom_node)
-            return self._f_string_to_ast_node(joined_pieces, atom_node)
+            return fstring.string_parse_literal(self, atom_node)
         #
         elif first_child_type == tokens.NUMBER:
             num_value = self.parse_number(first_child.get_value())
diff --git a/pypy/interpreter/astcompiler/fstring.py b/pypy/interpreter/astcompiler/fstring.py
new file mode 100644
--- /dev/null
+++ b/pypy/interpreter/astcompiler/fstring.py
@@ -0,0 +1,185 @@
+from pypy.interpreter.astcompiler import ast, consts
+from pypy.interpreter.pyparser import parsestring
+from pypy.interpreter import error
+
+
+def add_constant_string(astbuilder, joined_pieces, w_string, atom_node):
+    space = astbuilder.space
+    is_unicode = space.isinstance_w(w_string, space.w_unicode)
+    # Implement implicit string concatenation.
+    if joined_pieces:
+        prev = joined_pieces[-1]
+        if is_unicode and isinstance(prev, ast.Str):
+            w_string = space.add(prev.s, w_string)
+            del joined_pieces[-1]
+        elif not is_unicode and isinstance(prev, ast.Bytes):
+            w_string = space.add(prev.s, w_string)
+            del joined_pieces[-1]
+    node = ast.Str if is_unicode else ast.Bytes
+    joined_pieces.append(node(w_string, atom_node.get_lineno(),
+                                        atom_node.get_column()))
+
+def f_constant_string(astbuilder, joined_pieces, u, atom_node):
+    space = astbuilder.space
+    add_constant_string(astbuilder, joined_pieces, space.newunicode(u),
+                        atom_node)
+
+def f_string_compile(astbuilder, source, atom_node):
+    # Note: a f-string is kept as a single literal up to here.
+    # At this point only, we recursively call the AST compiler
+    # on all the '{expr}' parts.  The 'expr' part is not parsed
+    # or even tokenized together with the rest of the source code!
+    from pypy.interpreter.pyparser import pyparse
+    from pypy.interpreter.astcompiler.astbuilder import ast_from_node
+
+    # complain if 'source' is only whitespace or an empty string
+    for c in source:
+        if c not in ' \t\n\r\v\f':
+            break
+    else:
+        astbuilder.error("f-string: empty expression not allowed", atom_node)
+
+    if astbuilder.recursive_parser is None:
+        astbuilder.error("internal error: parser not available for parsing "
+                   "the expressions inside the f-string", atom_node)
+    source = '(%s)' % source.encode('utf-8')
+
+    info = pyparse.CompileInfo("<fstring>", "eval",
+                               consts.PyCF_SOURCE_IS_UTF8 |
+                               consts.PyCF_IGNORE_COOKIE |
+                               consts.PyCF_REFUSE_COMMENTS,
+                               optimize=astbuilder.compile_info.optimize)
+    parse_tree = astbuilder.recursive_parser.parse_source(source, info)
+    return ast_from_node(astbuilder.space, parse_tree, info)
+
+def f_string_expr(astbuilder, joined_pieces, u, start, atom_node, rec=0):
+    conversion = -1     # the conversion char.  -1 if not specified.
+    format_spec = None
+    nested_depth = 0    # nesting level for braces/parens/brackets in exprs
+    p = start
+    while p < len(u):
+        ch = u[p]
+        p += 1
+        if ch in u'[{(':
+            nested_depth += 1
+        elif nested_depth > 0 and ch in u']})':
+            nested_depth -= 1
+        elif nested_depth == 0 and ch in u'!:}':
+            # special-case '!='
+            if ch == u'!' and p < len(u) and u[p] == u'=':
+                continue
+            break     # normal way out of this loop
+    else:
+        ch = u'\x00'
+    #
+    if nested_depth > 0:
+        astbuilder.error("f-string: mismatched '(', '{' or '['", atom_node)
+    end_expression = p - 1
+    if ch == u'!':
+        if p + 1 < len(u):
+            conversion = ord(u[p])
+            ch = u[p + 1]
+            p += 2
+        if conversion not in (ord('s'), ord('r'), ord('a')):
+            astbuilder.error("f-string: invalid conversion character: "
+                             "expected 's', 'r', or 'a'", atom_node)
+    if ch == u':':
+        if rec >= 2:
+            astbuilder.error("f-string: expressions nested too deeply",
+                             atom_node)
+        subpieces = []
+        p = parse_f_string(astbuilder, subpieces, u, p, atom_node, rec + 1)
+        format_spec = f_string_to_ast_node(astbuilder, subpieces, atom_node)
+        ch = u[p] if p >= 0 else u'\x00'
+        p += 1
+
+    if ch != u'}':
+        astbuilder.error("f-string: expecting '}'", atom_node)
+    end_f_string = p
+    assert end_expression >= start
+    expr = f_string_compile(astbuilder, u[start:end_expression], atom_node)
+    assert isinstance(expr, ast.Expression)
+    fval = ast.FormattedValue(expr.body, conversion, format_spec,
+                              atom_node.get_lineno(),
+                              atom_node.get_column())
+    joined_pieces.append(fval)
+    return end_f_string
+
+def parse_f_string(astbuilder, joined_pieces, u, start, atom_node, rec=0):
+    space = astbuilder.space
+    p1 = u.find(u'{', start)
+    prestart = start
+    while True:
+        if p1 < 0:
+            p1 = len(u)
+        p2 = u.find(u'}', start, p1)
+        if p2 >= 0:
+            f_constant_string(astbuilder, joined_pieces, u[prestart:p2],
+                              atom_node)
+            pn = p2 + 1
+            if pn < len(u) and u[pn] == u'}':    # '}}' => single '}'
+                start = pn + 1
+                prestart = pn
+                continue
+            return p2     # found a single '}', stop here
+        f_constant_string(astbuilder, joined_pieces, u[prestart:p1], atom_node)
+        if p1 == len(u):
+            return -1     # no more '{' or '}' left
+        pn = p1 + 1
+        if pn < len(u) and u[pn] == u'{':    # '{{' => single '{'
+            start = pn + 1
+            prestart = pn
+        else:
+            assert u[p1] == u'{'
+            start = f_string_expr(astbuilder, joined_pieces, u, pn,
+                                  atom_node, rec)
+            assert u[start - 1] == u'}'
+            prestart = start
+        p1 = u.find(u'{', start)
+
+def f_string_to_ast_node(astbuilder, joined_pieces, atom_node):
+    # remove empty Strs
+    values = [node for node in joined_pieces
+                   if not (isinstance(node, ast.Str) and not node.s)]
+    if len(values) > 1:
+        return ast.JoinedStr(values, atom_node.get_lineno(),
+                                     atom_node.get_column())
+    elif len(values) == 1:
+        return values[0]
+    else:
+        assert len(joined_pieces) > 0    # they are all empty strings
+        return joined_pieces[0]
+
+def string_parse_literal(astbuilder, atom_node):
+    space = astbuilder.space
+    encoding = astbuilder.compile_info.encoding
+    joined_pieces = []
+    for i in range(atom_node.num_children()):
+        try:
+            w_next, saw_f = parsestring.parsestr(
+                    space, encoding, atom_node.get_child(i).get_value())
+        except error.OperationError as e:
+            if not (e.match(space, space.w_UnicodeError) or
+                    e.match(space, space.w_ValueError)):
+                raise
+            # Unicode/ValueError in literal: turn into SyntaxError
+            raise astbuilder.error(e.errorstr(space), atom_node)
+        if not saw_f:
+            add_constant_string(astbuilder, joined_pieces, w_next, atom_node)
+        else:
+            p = parse_f_string(astbuilder, joined_pieces,
+                                     space.unicode_w(w_next), 0,
+                                     atom_node)
+            if p != -1:
+                astbuilder.error("f-string: single '}' is not allowed",
+                                 atom_node)
+    if len(joined_pieces) == 1:   # <= the common path
+        return joined_pieces[0]   # ast.Str, Bytes or FormattedValue
+    # with more than one piece, it is a combination of Str and
+    # FormattedValue pieces---if there is a Bytes, then we got
+    # an invalid mixture of bytes and unicode literals
+    for node in joined_pieces:
+        if isinstance(node, ast.Bytes):
+            astbuilder.error("cannot mix bytes and nonbytes literals",
+                             atom_node)
+    return f_string_to_ast_node(astbuilder, joined_pieces, atom_node)