[pypy-dev] [pypy-commit] pypy merge-2.7.2: Implement CPython issue5057: do not const-fold a unicode.__getitem__

Maciej Fijalkowski fijall at gmail.com
Mon Jan 23 09:15:32 CET 2012


Why not? We have only wide build, don't we?

On Sun, Jan 22, 2012 at 9:57 PM, amauryfa <noreply at buildbot.pypy.org> wrote:
> Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
> Branch: merge-2.7.2
> Changeset: r51662:693b08144e00
> Date: 2012-01-22 20:24 +0100
> http://bitbucket.org/pypy/pypy/changeset/693b08144e00/
>
> Log:    Implement CPython issue5057: do not const-fold a unicode.__getitem__
>        operation which returns a non-BMP character, this produces .pyc
>        files which depends on the unicode width
>
> diff --git a/pypy/interpreter/astcompiler/optimize.py b/pypy/interpreter/astcompiler/optimize.py
> --- a/pypy/interpreter/astcompiler/optimize.py
> +++ b/pypy/interpreter/astcompiler/optimize.py
> @@ -5,6 +5,7 @@
>  from pypy.tool import stdlib_opcode as ops
>  from pypy.interpreter.error import OperationError
>  from pypy.rlib.unroll import unrolling_iterable
> +from pypy.rlib.runicode import MAXUNICODE
>
>
>  def optimize_ast(space, tree, compile_info):
> @@ -289,8 +290,30 @@
>                 w_idx = subs.slice.as_constant()
>                 if w_idx is not None:
>                     try:
> -                        return ast.Const(self.space.getitem(w_obj, w_idx), subs.lineno, subs.col_offset)
> +                        w_const = self.space.getitem(w_obj, w_idx)
>                     except OperationError:
> -                        # Let exceptions propgate at runtime.
> -                        pass
> +                        # Let exceptions propagate at runtime.
> +                        return subs
> +
> +                    # CPython issue5057: if v is unicode, there might
> +                    # be differences between wide and narrow builds in
> +                    # cases like u'\U00012345'[0].
> +                    # Wide builds will return a non-BMP char, whereas
> +                    # narrow builds will return a surrogate.  In both
> +                    # the cases skip the optimization in order to
> +                    # produce compatible pycs.
> +                    if (self.space.isinstance_w(w_obj, self.space.w_unicode)
> +                        and
> +                        self.space.isinstance_w(w_const, self.space.w_unicode)):
> +                        unistr = self.space.unicode_w(w_const)
> +                        if len(unistr) == 1:
> +                            ch = ord(unistr[0])
> +                        else:
> +                            ch = 0
> +                        if (ch > 0xFFFF or
> +                            (MAXUNICODE == 0xFFFF and 0xD800 <= ch <= OxDFFFF)):
> +                            return subs
> +
> +                    return ast.Const(w_const, subs.lineno, subs.col_offset)
> +
>         return subs
> diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py
> --- a/pypy/interpreter/astcompiler/test/test_compiler.py
> +++ b/pypy/interpreter/astcompiler/test/test_compiler.py
> @@ -838,6 +838,30 @@
>         # Just checking this doesn't crash out
>         self.count_instructions(source)
>
> +    def test_const_fold_unicode_subscr(self):
> +        source = """def f():
> +        return u"abc"[0]
> +        """
> +        counts = self.count_instructions(source)
> +        assert counts == {ops.LOAD_CONST: 1, ops.RETURN_VALUE: 1}
> +
> +        # getitem outside of the BMP should not be optimized
> +        source = """def f():
> +        return u"\U00012345"[0]
> +        """
> +        counts = self.count_instructions(source)
> +        assert counts == {ops.LOAD_CONST: 2, ops.BINARY_SUBSCR: 1,
> +                          ops.RETURN_VALUE: 1}
> +
> +        # getslice is not yet optimized.
> +        # Still, check a case which yields the empty string.
> +        source = """def f():
> +        return u"abc"[:0]
> +        """
> +        counts = self.count_instructions(source)
> +        assert counts == {ops.LOAD_CONST: 2, ops.SLICE+2: 1,
> +                          ops.RETURN_VALUE: 1}
> +
>     def test_remove_dead_code(self):
>         source = """def f(x):
>             return 5
> _______________________________________________
> pypy-commit mailing list
> pypy-commit at python.org
> http://mail.python.org/mailman/listinfo/pypy-commit


More information about the pypy-dev mailing list