[pypy-svn] r66294 - in pypy/branch/parser-compiler/pypy: interpreter interpreter/astcompiler interpreter/pyparser interpreter/pyparser/test module/__builtin__
benjamin at codespeak.net
benjamin at codespeak.net
Thu Jul 16 17:37:48 CEST 2009
Author: benjamin
Date: Thu Jul 16 17:37:47 2009
New Revision: 66294
Modified:
pypy/branch/parser-compiler/pypy/interpreter/astcompiler/consts.py
pypy/branch/parser-compiler/pypy/interpreter/pycompiler.py
pypy/branch/parser-compiler/pypy/interpreter/pyparser/pyparse.py
pypy/branch/parser-compiler/pypy/interpreter/pyparser/pytokenizer.py
pypy/branch/parser-compiler/pypy/interpreter/pyparser/test/test_pyparse.py
pypy/branch/parser-compiler/pypy/module/__builtin__/compiling.py
Log:
indicate with a parser flag if the source is unicode
This also moves compiling to astcompiler.consts.
Modified: pypy/branch/parser-compiler/pypy/interpreter/astcompiler/consts.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/astcompiler/consts.py (original)
+++ pypy/branch/parser-compiler/pypy/interpreter/astcompiler/consts.py Thu Jul 16 17:37:47 2009
@@ -22,3 +22,6 @@
CO_FUTURE_DIVISION = 0x2000
CO_FUTURE_ABSOLUTE_IMPORT = 0x4000
CO_FUTURE_WITH_STATEMENT = 0x8000
+
+PyCF_SOURCE_IS_UTF8 = 0x0100
+PyCF_DONT_IMPLY_DEDENT = 0x0200
Modified: pypy/branch/parser-compiler/pypy/interpreter/pycompiler.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/pycompiler.py (original)
+++ pypy/branch/parser-compiler/pypy/interpreter/pycompiler.py Thu Jul 16 17:37:47 2009
@@ -4,7 +4,7 @@
"""
import sys
-from codeop import PyCF_DONT_IMPLY_DEDENT
+from pypy.interpreter.astcompiler.consts import PyCF_DONT_IMPLY_DEDENT
from pypy.interpreter.error import OperationError
class AbstractCompiler(object):
Modified: pypy/branch/parser-compiler/pypy/interpreter/pyparser/pyparse.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/pyparser/pyparse.py (original)
+++ pypy/branch/parser-compiler/pypy/interpreter/pyparser/pyparse.py Thu Jul 16 17:37:47 2009
@@ -1,8 +1,7 @@
-import codeop
from pypy.interpreter import gateway
from pypy.interpreter.error import OperationError
from pypy.interpreter.pyparser import parser, pytokenizer, pygram, error
-from pypy.interpreter.astcompiler.consts import CO_FUTURE_WITH_STATEMENT
+from pypy.interpreter.astcompiler import consts
_recode_to_utf8 = gateway.applevel(r'''
@@ -82,13 +81,18 @@
"""Parse a python source according to goal"""
# Detect source encoding.
enc = None
- if textsrc[:3] == '\xEF\xBB\xBF':
+ if textsrc.startswith("\xEF\xBB\xBF"):
textsrc = textsrc[3:]
enc = 'utf-8'
- # check that there is no explicit encoding declared
+ # If an encoding is explicitly given check that it is utf-8.
decl_enc = _check_for_encoding(textsrc)
- if decl_enc is not None:
- raise SyntaxError("encoding declaration in Unicode string")
+ if decl_enc and decl_enc != "utf-8":
+ raise error.SyntaxError("UTF-8 BOM with non-utf8 coding cookie")
+ elif compile_info.flags & consts.PyCF_SOURCE_IS_UTF8:
+ enc = 'utf-8'
+
+ if _check_for_encoding(textsrc) is not None:
+ raise error.SyntaxError("coding declaration in unicode string")
else:
enc = _normalize_encoding(_check_for_encoding(textsrc))
if enc is not None and enc not in ('utf-8', 'iso-8859-1'):
@@ -104,7 +108,7 @@
raise
flags = compile_info.flags
- if flags & CO_FUTURE_WITH_STATEMENT:
+ if flags & consts.CO_FUTURE_WITH_STATEMENT:
self.grammar = pygram.python_grammar
else:
self.grammar = pygram.python_grammar_no_with_statement
@@ -112,7 +116,7 @@
if source_lines and not source_lines[-1].endswith("\n"):
source_lines[-1] += '\n'
if textsrc and textsrc[-1] == "\n":
- flags &= ~codeop.PyCF_DONT_IMPLY_DEDENT
+ flags &= ~consts.PyCF_DONT_IMPLY_DEDENT
self.prepare(_targets[compile_info.mode])
try:
Modified: pypy/branch/parser-compiler/pypy/interpreter/pyparser/pytokenizer.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/pyparser/pytokenizer.py (original)
+++ pypy/branch/parser-compiler/pypy/interpreter/pyparser/pytokenizer.py Thu Jul 16 17:37:47 2009
@@ -1,10 +1,10 @@
-import codeop
from pypy.interpreter.pyparser import automata
from pypy.interpreter.pyparser.pygram import tokens
from pypy.interpreter.pyparser.pytoken import python_opmap
from pypy.interpreter.pyparser.error import TokenError, TokenIndentationError
from pypy.interpreter.pyparser.pytokenize import tabsize, whiteSpaceDFA, \
triple_quoted, endDFAs, single_quoted, pseudoDFA
+from pypy.interpreter.astcompiler import consts
NAMECHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
NUMCHARS = '0123456789'
@@ -231,7 +231,7 @@
pos = pos + 1
lnum -= 1
- if not (flags & codeop.PyCF_DONT_IMPLY_DEDENT):
+ if not (flags & consts.PyCF_DONT_IMPLY_DEDENT):
if token_list and token_list[-1][0] != tokens.NEWLINE:
tok = (tokens.NEWLINE, '', lnum, 0, '\n')
token_list.append(tok)
Modified: pypy/branch/parser-compiler/pypy/interpreter/pyparser/test/test_pyparse.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/interpreter/pyparser/test/test_pyparse.py (original)
+++ pypy/branch/parser-compiler/pypy/interpreter/pyparser/test/test_pyparse.py Thu Jul 16 17:37:47 2009
@@ -1,9 +1,9 @@
# -*- coding: utf-8 -*-
-import codeop
import py
from pypy.interpreter.pyparser import pyparse
from pypy.interpreter.pyparser.pygram import syms, tokens
from pypy.interpreter.pyparser.error import SyntaxError, IndentationError
+from pypy.interpreter.astcompiler import consts
class TestPythonParser:
@@ -22,7 +22,7 @@
def test_dont_imply_dedent(self):
info = pyparse.CompileInfo("<test>", "single",
- codeop.PyCF_DONT_IMPLY_DEDENT)
+ consts.PyCF_DONT_IMPLY_DEDENT)
self.parse('if 1:\n x\n', info=info)
self.parse('x = 5 ', info=info)
@@ -42,6 +42,17 @@
input = (u"# coding: utf-7\nstuff = %s" % (sentence,)).encode("utf-7")
tree = self.parse(input, info=info)
assert info.encoding == "utf-7"
+ input = "\xEF\xBB\xBF# coding: utf-8\nx"
+ self.parse(input, info=info)
+ assert info.encoding == "utf-8"
+ input = "# coding: utf-8\nx"
+ info.flags |= consts.PyCF_SOURCE_IS_UTF8
+ exc = py.test.raises(SyntaxError, self.parse, input, info=info).value
+ info.flags &= ~consts.PyCF_SOURCE_IS_UTF8
+ assert exc.msg == "coding declaration in unicode string"
+ input = "\xEF\xBB\xBF# coding: latin-1\nx"
+ exc = py.test.raises(SyntaxError, self.parse, input).value
+ assert exc.msg == "UTF-8 BOM with non-utf8 coding cookie"
input = "# coding: not-here"
exc = py.test.raises(SyntaxError, self.parse, input).value
assert exc.msg == "Unknown encoding: not-here"
Modified: pypy/branch/parser-compiler/pypy/module/__builtin__/compiling.py
==============================================================================
--- pypy/branch/parser-compiler/pypy/module/__builtin__/compiling.py (original)
+++ pypy/branch/parser-compiler/pypy/module/__builtin__/compiling.py Thu Jul 16 17:37:47 2009
@@ -4,7 +4,8 @@
from pypy.interpreter.pycode import PyCode
from pypy.interpreter.baseobjspace import W_Root, ObjSpace
-from pypy.interpreter.error import OperationError
+from pypy.interpreter.error import OperationError
+from pypy.interpreter.astcompiler import consts
from pypy.interpreter.gateway import NoneNotWrapped
def compile(space, w_source, filename, mode, flags=0, dont_inherit=0):
@@ -21,11 +22,11 @@
in addition to any features explicitly specified.
"""
if space.is_true(space.isinstance(w_source, space.w_unicode)):
- # hack: encode the unicode string as UTF-8 and attach
- # a BOM at the start
- w_source = space.call_method(w_source, 'encode', space.wrap('utf-8'))
- str_ = space.str_w(w_source)
- str_ = '\xEF\xBB\xBF' + str_
+ w_utf_8_source = space.call_method(w_source, "encode",
+ space.wrap("utf-8"))
+ str_ = space.str_w(w_utf_8_source)
+ # This flag tells the parser to reject any coding cookies it sees.
+ flags |= consts.PyCF_SOURCE_IS_UTF8
else:
str_ = space.str_w(w_source)
More information about the Pypy-commit
mailing list