[pypy-svn] pypy compile-from-stream: Use the Python grammar to process __future__ imports,

amauryfa commits-noreply at bitbucket.org
Mon Mar 21 21:47:51 CET 2011


Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: compile-from-stream
Changeset: r42829:158c353210eb
Date: 2011-03-21 21:29 +0100
http://bitbucket.org/pypy/pypy/changeset/158c353210eb/

Log:	Use the Python grammar to process __future__ imports, and get rid of
	the hand-made FutureAutomaton

diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py
--- a/pypy/interpreter/pyparser/future.py
+++ b/pypy/interpreter/pyparser/future.py
@@ -1,306 +1,33 @@
-"""
-This automaton is designed to be invoked on a Python source string
-before the real parser starts working, in order to find all legal
-'from __future__ import blah'. As soon as something is encountered that
-would prevent more future imports, the analysis is aborted.
-The resulting legal futures are avaliable in self.flags after the
-pass has ended.
+from pypy.interpreter.astcompiler import ast
+from pypy.tool import stdlib___future__ as future
 
-Invocation is through get_futures(src), which returns a field of flags, one per
-found correct future import.
+def get_futures(future_flags, tree):
+    flags = 0
 
-The flags can then be used to set up the parser.
-All error detection is left to the parser.
+    if not isinstance(tree, (ast.Module, ast.Interactive)):
+        return flags, (0, 0)
 
-The reason we are not using the regular lexer/parser toolchain is that
-we do not want the overhead of generating tokens for entire files just
-to find information that resides in the first few lines of the file.
-Neither do we require sane error messages, as this job is handled by
-the parser.
+    if not tree.body:
+        return flags, (0, 0)
 
-To make the parsing fast, especially when the module is translated to C,
-the code has been written in a very serial fashion, using an almost
-assembler like style. A further speedup could be achieved by replacing
-the "in" comparisons with explicit numeric comparisons.
-"""
+    found_docstring = False
 
-from pypy.interpreter.astcompiler.consts import CO_GENERATOR_ALLOWED, \
-    CO_FUTURE_DIVISION, CO_FUTURE_WITH_STATEMENT, CO_FUTURE_ABSOLUTE_IMPORT
-
-def get_futures(future_flags, source):
-    futures = FutureAutomaton(future_flags, source)
-    try:
-        futures.start()
-    except DoneException, e:
-        pass
-    return futures.flags, (futures.lineno, futures.col_offset)
-
-class DoneException(Exception):
-    pass
-
-whitespace = ' \t\f'
-whitespace_or_newline = whitespace + '\n\r'
-letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYabcdefghijklmnopqrstuvwxyz_'
-alphanumerics = letters + '1234567890'
-
-class FutureAutomaton(object):
-    """
-    A future statement must appear near the top of the module.
-    The only lines that can appear before a future statement are:
-
-        * the module docstring (if any),
-        * comments,
-        * blank lines, and
-        * other future statements.
-
-    The features recognized by Python 2.5 are "generators",
-    "division", "nested_scopes" and "with_statement", "absolute_import".
-    "generators", "division" and "nested_scopes" are redundant
-    in 2.5 because they are always enabled.
-
-    This module parses the input until it encounters something that is
-    not recognized as a valid future statement or something that may
-    precede a future statement.
-    """
-
-    def __init__(self, future_flags, string):
-        self.future_flags = future_flags
-        self.s = string
-        self.pos = 0
-        self.current_lineno = 1
-        self.lineno = -1
-        self.line_start_pos = 0
-        self.col_offset = 0
-        self.docstring_consumed = False
-        self.flags = 0
-        self.got_features = 0
-
-    def getc(self, offset=0):
-        try:
-            return self.s[self.pos + offset]
-        except IndexError:
-            raise DoneException
-
-    def start(self):
-        c = self.getc()
-        if c in ("'", '"', "r", "u") and not self.docstring_consumed:
-            self.consume_docstring()
-        elif c == '\\' or c in whitespace_or_newline:
-            self.consume_empty_line()
-        elif c == '#':
-            self.consume_comment()
-        elif c == 'f':
-            self.consume_from()
-        else:
-            return
-
-    def atbol(self):
-        self.current_lineno += 1
-        self.line_start_pos = self.pos
-
-    def consume_docstring(self):
-        self.docstring_consumed = True
-        if self.getc() == "r":
-            self.pos += 1
-        if self.getc() == "u":
-            self.pos += 1
-        endchar = self.getc()
-        if (self.getc() == self.getc(+1) and
-            self.getc() == self.getc(+2)):
-            self.pos += 3
-            while 1: # Deal with a triple quoted docstring
-                if self.getc() == '\\':
-                    self.pos += 2
-                else:
-                    c = self.getc()
-                    if c != endchar:
-                        self.pos += 1
-                        if c == '\n':
-                            self.atbol()
-                        elif c == '\r':
-                            if self.getc() == '\n':
-                                self.pos += 1
-                                self.atbol()
-                    else:
-                        self.pos += 1
-                        if (self.getc() == endchar and
-                            self.getc(+1) == endchar):
-                            self.pos += 2
-                            self.consume_empty_line()
-                            break
-
-        else: # Deal with a single quoted docstring
-            self.pos += 1
-            while 1:
-                c = self.getc()
-                self.pos += 1
-                if c == endchar:
-                    self.consume_empty_line()
-                    return
-                elif c == '\\':
-                    # Deal with linefeeds
-                    if self.getc() != '\r':
-                        self.pos += 1
-                    else:
-                        self.pos += 1
-                        if self.getc() == '\n':
-                            self.pos += 1
-                elif c in '\r\n':
-                    # Syntax error
-                    return
-
-    def consume_continuation(self):
-        c = self.getc()
-        if c in '\n\r':
-            self.pos += 1
-            self.atbol()
-
-    def consume_empty_line(self):
-        """
-        Called when the remainder of the line can only contain whitespace
-        and comments.
-        """
-        while self.getc() in whitespace:
-            self.pos += 1
-        if self.getc() == '#':
-            self.consume_comment()
-        elif self.getc() == ';':
-            self.pos += 1
-            self.consume_whitespace()
-            self.start()
-        elif self.getc() in '\\':
-            self.pos += 1
-            self.consume_continuation()
-            self.start()
-        elif self.getc() in '\r\n':
-            c = self.getc()
-            self.pos += 1
-            if c == '\r':
-                if self.getc() == '\n':
-                    self.pos += 1
-                self.atbol()
-            else:
-                self.atbol()
-            self.start()
-
-    def consume_comment(self):
-        self.pos += 1
-        while self.getc() not in '\r\n':
-            self.pos += 1
-        self.consume_empty_line()
-
-    def consume_from(self):
-        col_offset = self.pos - self.line_start_pos
-        line = self.current_lineno
-        self.pos += 1
-        if self.getc() == 'r' and self.getc(+1) == 'o' and self.getc(+2) == 'm':
-            self.docstring_consumed = True
-            self.pos += 3
-            self.consume_mandatory_whitespace()
-            if self.s[self.pos:self.pos+10] != '__future__':
-                raise DoneException
-            self.pos += 10
-            self.consume_mandatory_whitespace()
-            if self.s[self.pos:self.pos+6] != 'import':
-                raise DoneException
-            self.pos += 6
-            self.consume_whitespace()
-            old_got = self.got_features
-            try:
-                if self.getc() == '(':
-                    self.pos += 1
-                    self.consume_whitespace()
-                    self.set_flag(self.get_name())
-                    # Set flag corresponding to name
-                    self.get_more(paren_list=True)
-                else:
-                    self.set_flag(self.get_name())
-                    self.get_more()
-            finally:
-                if self.got_features > old_got:
-                    self.col_offset = col_offset
-                    self.lineno = line
-            self.consume_empty_line()
-
-    def consume_mandatory_whitespace(self):
-        if self.getc() not in whitespace + '\\':
-            raise DoneException
-        self.consume_whitespace()
-
-    def consume_whitespace(self):
-        while 1:
-            c = self.getc()
-            if c in whitespace:
-                self.pos += 1
-                continue
-            elif c == '\\':
-                self.pos += 1
-                c = self.getc()
-                if c == '\n':
-                    self.pos += 1
-                    self.atbol()
-                    continue
-                elif c == '\r':
-                    self.pos += 1
-                    if self.getc() == '\n':
-                        self.pos += 1
-                        self.atbol()
-                else:
-                    raise DoneException
-            else:
-                return
-
-    def get_name(self):
-        if self.getc() not in letters:
-            raise DoneException
-        p = self.pos
-        try:
-            while self.getc() in alphanumerics:
-                self.pos += 1
-        except DoneException:
-            # If there's any name at all, we want to call self.set_flag().
-            # Something else while get the DoneException again.
-            if self.pos == p:
-                raise
-            end = self.pos
-        else:
-            end = self.pos
-            self.consume_whitespace()
-        return self.s[p:end]
-
-    def get_more(self, paren_list=False):
-        if paren_list and self.getc() == ')':
-            self.pos += 1
-            return
-        if (self.getc() == 'a' and
-            self.getc(+1) == 's' and
-            self.getc(+2) in whitespace):
-            self.get_name()
-            self.get_name()
-            self.get_more(paren_list=paren_list)
-            return
-        elif self.getc() != ',':
-            return
-        else:
-            self.pos += 1
-            self.consume_whitespace()
-            if paren_list and self.getc() == ')':
-                self.pos += 1
-                return # Handles trailing comma inside parenthesis
-            self.set_flag(self.get_name())
-            self.get_more(paren_list=paren_list)
-
-    def set_flag(self, feature):
-        self.got_features += 1
-        try:
-            self.flags |= self.future_flags.compiler_features[feature]
-        except KeyError:
-            pass
-
-from codeop import PyCF_DONT_IMPLY_DEDENT
-from pypy.interpreter.error import OperationError
-
-from pypy.tool import stdlib___future__ as future
+    for elem in tree.body:
+        if isinstance(elem, ast.ImportFrom):
+            if elem.module != '__future__':
+                break
+            for alias in elem.names:
+                name = alias.name
+                try:
+                    flags |= future_flags.compiler_features[name]
+                except KeyError:
+                    pass
+        elif isinstance(elem, ast.Expr):
+            if found_docstring:
+                break
+            if isinstance(elem.value, ast.Str):
+                found_docstring = True
+    return flags, (elem.lineno, elem.col_offset)
 
 class FutureFlags(object):
 

diff --git a/pypy/interpreter/pycompiler.py b/pypy/interpreter/pycompiler.py
--- a/pypy/interpreter/pycompiler.py
+++ b/pypy/interpreter/pycompiler.py
@@ -140,11 +140,11 @@
     def _compile_to_ast(self, source, info):
         space = self.space
         try:
-            f_flags, future_info = future.get_futures(self.future_flags, source)
+            parse_tree = self.parser.parse_source(source, info)
+            mod = astbuilder.ast_from_node(space, parse_tree, info)
+            f_flags, future_info = future.get_futures(self.future_flags, mod)
             info.last_future_import = future_info
             info.flags |= f_flags
-            parse_tree = self.parser.parse_source(source, info)
-            mod = astbuilder.ast_from_node(space, parse_tree, info)
         except parseerror.IndentationError, e:
             raise OperationError(space.w_IndentationError,
                                  e.wrap_info(space))

diff --git a/pypy/interpreter/pyparser/parser.py b/pypy/interpreter/pyparser/parser.py
--- a/pypy/interpreter/pyparser/parser.py
+++ b/pypy/interpreter/pyparser/parser.py
@@ -182,3 +182,4 @@
             self.stack[-1][2].children.append(node)
         else:
             self.root = node
+        return node

diff --git a/pypy/interpreter/pyparser/pyparse.py b/pypy/interpreter/pyparser/pyparse.py
--- a/pypy/interpreter/pyparser/pyparse.py
+++ b/pypy/interpreter/pyparser/pyparse.py
@@ -217,12 +217,51 @@
 
         return self.build_tree(source_lines, compile_info)
 
+    def parse_future_import(self, node):
+        if node.type != self.grammar.symbol_ids['import_from']:
+            return
+        children = node.children
+        # from __future__ import ..., must have at least 4 children
+        if len(children) < 4:
+            return
+        if children[0].value != 'from':
+            return
+        if len(children[1].children) != 1:
+            return
+        if children[1].children[0].value != '__future__':
+            return
+
+        child = children[3]
+        # child can be a star, a parenthesis or import_as_names
+        if child.type == pygram.tokens.STAR:
+            return
+        if child.type == pygram.tokens.LPAR:
+            child = children[4]
+
+        for i in range(0, len(child.children), 2):
+            c = child.children[i]
+            if (len(c.children) >= 1 and
+                c.children[0].type == pygram.tokens.NAME):
+                name = c.children[0].value
+
+            if name == 'print_function':
+                self.compile_info.flags |= consts.CO_FUTURE_PRINT_FUNCTION
+            elif name == 'with_statement':
+                self.compile_info.flags |= consts.CO_FUTURE_WITH_STATEMENT
+            elif name == 'unicode_literals':
+                self.compile_info.flags |= consts.CO_FUTURE_UNICODE_LITERALS
+
     def classify(self, token_type, value, *args):
         if self.compile_info.flags & consts.CO_FUTURE_PRINT_FUNCTION:
             if token_type == self.grammar.KEYWORD_TOKEN and value == 'print':
                 return self.grammar.token_ids[pygram.tokens.NAME]
         return parser.Parser.classify(self, token_type, value, *args)
 
+    def pop(self):
+        node = parser.Parser.pop(self)
+        self.parse_future_import(node)
+        return node
+
     def build_tree(self, source_lines, compile_info):
         """Builds the parse tree from a list of source lines"""
 

diff --git a/pypy/interpreter/pyparser/test/test_pyparse.py b/pypy/interpreter/pyparser/test/test_pyparse.py
--- a/pypy/interpreter/pyparser/test/test_pyparse.py
+++ b/pypy/interpreter/pyparser/test/test_pyparse.py
@@ -161,6 +161,22 @@
         self.parse('0b0l')
         py.test.raises(SyntaxError, self.parse, "0b112")
 
+    def test_future_import(self):
+        def parse_with_info(source):
+            info = pyparse.CompileInfo("<test>", "exec", 0)
+            self.parse(source, info=info)
+            return info.flags
+        source = 'from __future__ import print_function'
+        assert parse_with_info(source) == consts.CO_FUTURE_PRINT_FUNCTION
+        source = 'from __future__ import print_function, with_statement'
+        assert parse_with_info(source) == (consts.CO_FUTURE_PRINT_FUNCTION |
+                                           consts.CO_FUTURE_WITH_STATEMENT)
+        source = 'from __future__ import (print_function,\nwith_statement)'
+        assert parse_with_info(source) == (consts.CO_FUTURE_PRINT_FUNCTION |
+                                           consts.CO_FUTURE_WITH_STATEMENT)
+        source = 'from __future__ import *'
+        assert parse_with_info(source) == 0
+
 class TestPythonFileParser(TestPythonParser):
     def parse(self, source, mode="exec", info=None):
         if info is None:


More information about the Pypy-commit mailing list