[pypy-dev] [pypy-commit] pypy merge-2.7.2: Implement CPython issue5057: do not const-fold a unicode.__getitem__
Maciej Fijalkowski
fijall at gmail.com
Mon Jan 23 09:15:32 CET 2012
Why not? We have only wide build, don't we?
On Sun, Jan 22, 2012 at 9:57 PM, amauryfa <noreply at buildbot.pypy.org> wrote:
> Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
> Branch: merge-2.7.2
> Changeset: r51662:693b08144e00
> Date: 2012-01-22 20:24 +0100
> http://bitbucket.org/pypy/pypy/changeset/693b08144e00/
>
> Log: Implement CPython issue5057: do not const-fold a unicode.__getitem__
> operation which returns a non-BMP character, this produces .pyc
> files which depends on the unicode width
>
> diff --git a/pypy/interpreter/astcompiler/optimize.py b/pypy/interpreter/astcompiler/optimize.py
> --- a/pypy/interpreter/astcompiler/optimize.py
> +++ b/pypy/interpreter/astcompiler/optimize.py
> @@ -5,6 +5,7 @@
> from pypy.tool import stdlib_opcode as ops
> from pypy.interpreter.error import OperationError
> from pypy.rlib.unroll import unrolling_iterable
> +from pypy.rlib.runicode import MAXUNICODE
>
>
> def optimize_ast(space, tree, compile_info):
> @@ -289,8 +290,30 @@
> w_idx = subs.slice.as_constant()
> if w_idx is not None:
> try:
> - return ast.Const(self.space.getitem(w_obj, w_idx), subs.lineno, subs.col_offset)
> + w_const = self.space.getitem(w_obj, w_idx)
> except OperationError:
> - # Let exceptions propgate at runtime.
> - pass
> + # Let exceptions propagate at runtime.
> + return subs
> +
> + # CPython issue5057: if v is unicode, there might
> + # be differences between wide and narrow builds in
> + # cases like u'\U00012345'[0].
> + # Wide builds will return a non-BMP char, whereas
> + # narrow builds will return a surrogate. In both
> + # the cases skip the optimization in order to
> + # produce compatible pycs.
> + if (self.space.isinstance_w(w_obj, self.space.w_unicode)
> + and
> + self.space.isinstance_w(w_const, self.space.w_unicode)):
> + unistr = self.space.unicode_w(w_const)
> + if len(unistr) == 1:
> + ch = ord(unistr[0])
> + else:
> + ch = 0
> + if (ch > 0xFFFF or
> + (MAXUNICODE == 0xFFFF and 0xD800 <= ch <= OxDFFFF)):
> + return subs
> +
> + return ast.Const(w_const, subs.lineno, subs.col_offset)
> +
> return subs
> diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py
> --- a/pypy/interpreter/astcompiler/test/test_compiler.py
> +++ b/pypy/interpreter/astcompiler/test/test_compiler.py
> @@ -838,6 +838,30 @@
> # Just checking this doesn't crash out
> self.count_instructions(source)
>
> + def test_const_fold_unicode_subscr(self):
> + source = """def f():
> + return u"abc"[0]
> + """
> + counts = self.count_instructions(source)
> + assert counts == {ops.LOAD_CONST: 1, ops.RETURN_VALUE: 1}
> +
> + # getitem outside of the BMP should not be optimized
> + source = """def f():
> + return u"\U00012345"[0]
> + """
> + counts = self.count_instructions(source)
> + assert counts == {ops.LOAD_CONST: 2, ops.BINARY_SUBSCR: 1,
> + ops.RETURN_VALUE: 1}
> +
> + # getslice is not yet optimized.
> + # Still, check a case which yields the empty string.
> + source = """def f():
> + return u"abc"[:0]
> + """
> + counts = self.count_instructions(source)
> + assert counts == {ops.LOAD_CONST: 2, ops.SLICE+2: 1,
> + ops.RETURN_VALUE: 1}
> +
> def test_remove_dead_code(self):
> source = """def f(x):
> return 5
> _______________________________________________
> pypy-commit mailing list
> pypy-commit at python.org
> http://mail.python.org/mailman/listinfo/pypy-commit
More information about the pypy-dev
mailing list