[pypy-commit] pypy py3k: completely change the strategy for unicode identifiers: instead of internally storing them as rpython unicode strings, we store them as utf-8 encoded byte strings, and decode them to unicode only when necessary (e.g., to present them to the user)

antocuni noreply at buildbot.pypy.org
Fri Aug 31 10:05:51 CEST 2012


Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: py3k
Changeset: r57035:133e4041f97f
Date: 2012-08-30 17:23 +0200
http://bitbucket.org/pypy/pypy/changeset/133e4041f97f/

Log:	completely change the strategy for unicode identifiers: instead of
	internally storing them as rpython unicode strings, we store them as
	utf-8 encoded byte strings, and decode them to unicode only when
	necessary (e.g., to present them to the user)

diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py
--- a/pypy/interpreter/argument.py
+++ b/pypy/interpreter/argument.py
@@ -5,7 +5,6 @@
 from pypy.interpreter.error import OperationError, operationerrfmt
 from pypy.rlib.debug import make_sure_not_resized
 from pypy.rlib import jit
-from pypy.rlib.debug import check_annotation
 from pypy.rlib.objectmodel import enforceargs
 
 class Signature(object):
@@ -93,22 +92,6 @@
             return self.kwargname
         raise IndexError
 
-
-
-def check_list_of_unicode(ann, bk):
-    from pypy.annotation.model import (SomeList, SomeUnicodeString,
-                                       s_None, s_ImpossibleValue)
-    if ann is s_None:
-        return
-    if not isinstance(ann, SomeList):
-        raise TypeError
-    s_item = ann.listdef.listitem.s_value
-    if s_item is s_ImpossibleValue:
-        return
-    if not isinstance(s_item, SomeUnicodeString):
-        raise TypeError
-
-
 class Arguments(object):
     """
     Collects the arguments of a function call.
@@ -127,7 +110,6 @@
         self.space = space
         assert isinstance(args_w, list)
         self.arguments_w = args_w
-        check_annotation(keywords, check_list_of_unicode)
         
         self.keywords = keywords
         self.keywords_w = keywords_w
@@ -203,7 +185,6 @@
         # unpack the ** arguments
         space = self.space
         keywords, values_w = space.view_as_kwargs(w_starstararg)
-        check_annotation(keywords, check_list_of_unicode)
         if keywords is not None: # this path also taken for empty dicts
             if self.keywords is None:
                 self.keywords = keywords
@@ -421,7 +402,7 @@
                                   w_kw_defs, 0)
         except ArgErr, e:
             raise operationerrfmt(self.space.w_TypeError,
-                                  "%s() %s", fnname, e.getmsg())
+                                  "%s() %8", fnname, e.getmsg())
         return signature.scope_length()
 
     def _parse(self, w_firstarg, signature, defaults_w, w_kw_defs, blindargs=0):
@@ -446,7 +427,7 @@
                                blindargs)
         except ArgErr, e:
             raise operationerrfmt(self.space.w_TypeError,
-                                  "%s() %s", fnname, e.getmsg())
+                                  "%s() %8", fnname, e.getmsg())
 
     @staticmethod
     def frompacked(space, w_args=None, w_kwds=None):
@@ -492,11 +473,10 @@
 
 def _do_combine_starstarargs_wrapped(space, keys_w, w_starstararg, keywords,
         keywords_w, existingkeywords):
-    
     i = 0
     for w_key in keys_w:
         try:
-            key = space.unicode_w(w_key)
+            key = space.unicode_w(w_key).encode('utf-8')
         except OperationError, e:
             if e.match(space, space.w_TypeError):
                 raise OperationError(
@@ -778,6 +758,7 @@
             self.argname)
         return msg
 
+
 class ArgErrUnknownKwds(ArgErr):
 
     def __init__(self, space, num_remainingkwds, keywords, kwds_mapping,
@@ -798,9 +779,9 @@
                             # note: negative-based indexing from the end
                             w_name = keyword_names_w[i - len(keywords)]
                         except IndexError:
-                            name = u'?'
+                            name = '?'
                         else:
-                            name = space.unicode_w(w_name)
+                            name = space.unicode_w(w_name).encode('utf-8')
                     break
         self.kwd_name = name
 
diff --git a/pypy/module/__builtin__/compiling.py b/pypy/module/__builtin__/compiling.py
--- a/pypy/module/__builtin__/compiling.py
+++ b/pypy/module/__builtin__/compiling.py
@@ -6,7 +6,7 @@
 from pypy.interpreter.error import OperationError
 from pypy.interpreter.astcompiler import consts, ast
 from pypy.interpreter.gateway import unwrap_spec
-from pypy.interpreter.argument import Arguments, check_annotation, check_list_of_unicode
+from pypy.interpreter.argument import Arguments
 from pypy.interpreter.nestedscope import Cell
 
 @unwrap_spec(filename=str, mode=str, flags=int, dont_inherit=int, optimize=int)
@@ -114,7 +114,7 @@
 def build_class(space, w_func, w_name, __args__):
     bases_w, kwds_w = __args__.unpack()
     w_bases = space.newtuple(bases_w)
-    w_meta = kwds_w.pop(u'metaclass', None)
+    w_meta = kwds_w.pop('metaclass', None)
     if w_meta is None:
         if bases_w:
             w_meta = space.type(bases_w[0])
@@ -129,7 +129,6 @@
         w_namespace = space.newdict()
     else:
         keywords = kwds_w.keys()
-        check_annotation(keywords, check_list_of_unicode)
         args = Arguments(space, 
                          args_w=[w_name, w_bases],
                          keywords=keywords,
@@ -137,7 +136,6 @@
         w_namespace = space.call_args(w_prep, args)
     w_cell = space.call_function(w_func, w_namespace)
     keywords = kwds_w.keys()
-    check_annotation(keywords, check_list_of_unicode)
     args = Arguments(space,
                      args_w=[w_name, w_bases, w_namespace],
                      keywords=keywords,


More information about the pypy-commit mailing list