[pypy-svn] r48475 - in pypy/branch/unicode-objspace/pypy: rpython rpython/lltypesystem rpython/ootypesystem rpython/ootypesystem/test rpython/test translator/cli translator/cli/src translator/cli/src/stub translator/cli/test translator/oosupport

antocuni at codespeak.net antocuni at codespeak.net
Fri Nov 9 18:00:54 CET 2007


Author: antocuni
Date: Fri Nov  9 18:00:53 2007
New Revision: 48475

Added:
   pypy/branch/unicode-objspace/pypy/translator/cli/test/test_unicode.py
      - copied unchanged from r48474, pypy/dist/pypy/translator/cli/test/test_unicode.py
Modified:
   pypy/branch/unicode-objspace/pypy/rpython/llinterp.py
   pypy/branch/unicode-objspace/pypy/rpython/lltypesystem/lloperation.py
   pypy/branch/unicode-objspace/pypy/rpython/ootypesystem/ooregistry.py
   pypy/branch/unicode-objspace/pypy/rpython/ootypesystem/ootype.py
   pypy/branch/unicode-objspace/pypy/rpython/ootypesystem/rstr.py
   pypy/branch/unicode-objspace/pypy/rpython/ootypesystem/test/test_oostring.py
   pypy/branch/unicode-objspace/pypy/rpython/test/test_runicode.py
   pypy/branch/unicode-objspace/pypy/translator/cli/cts.py
   pypy/branch/unicode-objspace/pypy/translator/cli/ilgenerator.py
   pypy/branch/unicode-objspace/pypy/translator/cli/metavm.py
   pypy/branch/unicode-objspace/pypy/translator/cli/opcodes.py
   pypy/branch/unicode-objspace/pypy/translator/cli/prebuiltnodes.py
   pypy/branch/unicode-objspace/pypy/translator/cli/src/pypylib.cs
   pypy/branch/unicode-objspace/pypy/translator/cli/src/stub/main.il
   pypy/branch/unicode-objspace/pypy/translator/cli/test/runtest.py
   pypy/branch/unicode-objspace/pypy/translator/oosupport/constant.py
Log:
apply checkin 48474 also to this branch:

svn merge svn+ssh://codespeak.net/svn/pypy/dist/pypy@48473 svn+ssh://codespeak.net/svn/pypy/dist/pypy@48474



Modified: pypy/branch/unicode-objspace/pypy/rpython/llinterp.py
==============================================================================
--- pypy/branch/unicode-objspace/pypy/rpython/llinterp.py	(original)
+++ pypy/branch/unicode-objspace/pypy/rpython/llinterp.py	Fri Nov  9 18:00:53 2007
@@ -1087,7 +1087,10 @@
         return ootype.oostring(obj, base)
 
     def op_oounicode(self, obj, base):
-        return ootype.oounicode(obj, base)
+        try:
+            return ootype.oounicode(obj, base)
+        except UnicodeDecodeError:
+            self.make_llexception()
 
     def op_ooparse_int(self, s, base):
         try:

Modified: pypy/branch/unicode-objspace/pypy/rpython/lltypesystem/lloperation.py
==============================================================================
--- pypy/branch/unicode-objspace/pypy/rpython/lltypesystem/lloperation.py	(original)
+++ pypy/branch/unicode-objspace/pypy/rpython/lltypesystem/lloperation.py	Fri Nov  9 18:00:53 2007
@@ -468,8 +468,9 @@
     'ooidentityhash':       LLOp(oo=True, sideeffects=False),
     'oostring':             LLOp(oo=True, sideeffects=False),
     'ooparse_int':          LLOp(oo=True, canraise=(ValueError,)),
-    'ooparse_float':          LLOp(oo=True, canraise=(ValueError,)),
+    'ooparse_float':        LLOp(oo=True, canraise=(ValueError,)),
     'oohash':               LLOp(oo=True, sideeffects=False),
+    'oounicode':            LLOp(oo=True, canraise=(UnicodeDecodeError,)),
 
     # _____ read frame var support ___
     'get_frame_base':       LLOp(sideeffects=False),

Modified: pypy/branch/unicode-objspace/pypy/rpython/ootypesystem/ooregistry.py
==============================================================================
--- pypy/branch/unicode-objspace/pypy/rpython/ootypesystem/ooregistry.py	(original)
+++ pypy/branch/unicode-objspace/pypy/rpython/ootypesystem/ooregistry.py	Fri Nov  9 18:00:53 2007
@@ -28,12 +28,15 @@
     _about_ = ootype.oounicode
 
     def compute_result_annotation(self, obj_s, base_s):
-        assert isinstance(obj_s, annmodel.SomeUnicodeCodePoint)
+        assert isinstance(obj_s, annmodel.SomeUnicodeCodePoint) or \
+               (isinstance(obj_s, annmodel.SomeOOInstance)
+                and obj_s.ootype in (ootype.String, ootype.Unicode))
         assert isinstance(base_s, annmodel.SomeInteger)
         return annmodel.SomeOOInstance(ootype.Unicode)
 
     def specialize_call(self, hop):
-        assert isinstance(hop.args_s[0],annmodel.SomeUnicodeCodePoint)
+        assert isinstance(hop.args_s[0], (annmodel.SomeUnicodeCodePoint,
+                                          annmodel.SomeOOInstance))
         vlist = hop.inputargs(hop.args_r[0], ootype.Signed)
         return hop.genop('oounicode', vlist, resulttype = ootype.Unicode)
     

Modified: pypy/branch/unicode-objspace/pypy/rpython/ootypesystem/ootype.py
==============================================================================
--- pypy/branch/unicode-objspace/pypy/rpython/ootypesystem/ootype.py	(original)
+++ pypy/branch/unicode-objspace/pypy/rpython/ootypesystem/ootype.py	Fri Nov  9 18:00:53 2007
@@ -1563,14 +1563,21 @@
 
 def oounicode(obj, base):
     """
-    Convert an unichar into an unicode string.
+    Convert:
+      - an unichar into an unicode string OR
+      - a string into an unicode string
 
     base must be -1, for consistency with oostring.
     """
     assert base == -1
-    assert isinstance(obj, unicode)
-    assert len(obj) == 1
-    return make_unicode(obj)
+    if isinstance(obj, unicode):
+        assert len(obj) == 1
+        return make_unicode(obj)
+    elif isinstance(obj, _string):
+        s = unicode(obj._str)
+        return make_unicode(s)
+    else:
+        assert False
 
 def ooparse_int(s, base):
     return int(s._str, base)

Modified: pypy/branch/unicode-objspace/pypy/rpython/ootypesystem/rstr.py
==============================================================================
--- pypy/branch/unicode-objspace/pypy/rpython/ootypesystem/rstr.py	(original)
+++ pypy/branch/unicode-objspace/pypy/rpython/ootypesystem/rstr.py	Fri Nov  9 18:00:53 2007
@@ -86,15 +86,7 @@
         return ootype.oostring(ch, -1)
 
     def ll_str2unicode(s):
-        res = ootype.new(ootype.UnicodeBuilder)
-        lgt = s.ll_strlen()
-        res.ll_allocate(lgt)
-        for i in range(lgt):
-            c = s.ll_stritem_nonneg(i)
-            if ord(c) > 127:
-                raise UnicodeDecodeError
-            res.ll_append_char(cast_primitive(UniChar, c))
-        return res.ll_build()
+        return ootype.oounicode(s, -1)
 
     def ll_unichr2unicode(ch):
         return ootype.oounicode(ch, -1)

Modified: pypy/branch/unicode-objspace/pypy/rpython/ootypesystem/test/test_oostring.py
==============================================================================
--- pypy/branch/unicode-objspace/pypy/rpython/ootypesystem/test/test_oostring.py	(original)
+++ pypy/branch/unicode-objspace/pypy/rpython/ootypesystem/test/test_oostring.py	Fri Nov  9 18:00:53 2007
@@ -1,3 +1,4 @@
+import py
 from pypy.rpython.ootypesystem import ootype
 from pypy.rpython.ootypesystem.rstr import string_repr
 from pypy.rpython.test.test_llinterp import interpret 
@@ -26,3 +27,17 @@
 
     res = interpret(f, [], type_system='ootype')
     assert res._str == 'foo'
+
+def test_oounicode():
+    u = ootype.oounicode(u'a', -1)
+    assert isinstance(u, ootype._string)
+    assert ootype.typeOf(u) is ootype.Unicode
+
+    s = ootype.make_string('a string')
+    u = ootype.oounicode(s, -1)
+    assert isinstance(u, ootype._string)
+    assert ootype.typeOf(u) is ootype.Unicode
+
+    s = ootype.make_string('non-ascii string: \xe0')
+    py.test.raises(UnicodeDecodeError, ootype.oounicode, s, -1)
+    

Modified: pypy/branch/unicode-objspace/pypy/rpython/test/test_runicode.py
==============================================================================
--- pypy/branch/unicode-objspace/pypy/rpython/test/test_runicode.py	(original)
+++ pypy/branch/unicode-objspace/pypy/rpython/test/test_runicode.py	Fri Nov  9 18:00:53 2007
@@ -186,6 +186,10 @@
     test_float = unsupported
     test_hlstr = unsupported
 
+    def test_find_empty_string(self):
+        py.test.skip("We should think how to solve this problem")
+    test_rfind_empty_string = test_find_empty_string
+
 class TestLLtype(BaseTestRUnicode, LLRtypeMixin):
     EMPTY_STRING_HASH = -1
 

Modified: pypy/branch/unicode-objspace/pypy/translator/cli/cts.py
==============================================================================
--- pypy/branch/unicode-objspace/pypy/translator/cli/cts.py	(original)
+++ pypy/branch/unicode-objspace/pypy/translator/cli/cts.py	Fri Nov  9 18:00:53 2007
@@ -132,6 +132,8 @@
     ootype.Class: types.type,
     ootype.String: types.string,
     ootype.StringBuilder: types.string_builder,
+    ootype.Unicode: types.string,
+    ootype.UnicodeBuilder: types.string_builder,
     ootype.WeakReference: types.weakref,
 
     # maps generic types to their ordinal

Modified: pypy/branch/unicode-objspace/pypy/translator/cli/ilgenerator.py
==============================================================================
--- pypy/branch/unicode-objspace/pypy/translator/cli/ilgenerator.py	(original)
+++ pypy/branch/unicode-objspace/pypy/translator/cli/ilgenerator.py	Fri Nov  9 18:00:53 2007
@@ -397,7 +397,7 @@
             ilasm.opcode('ldc.i4', str(value))
         elif TYPE in (ootype.SignedLongLong, ootype.UnsignedLongLong):
             ilasm.opcode('ldc.i8', str(value))
-        elif TYPE is ootype.String:
+        elif TYPE in (ootype.String, ootype.Unicode):
             if value._str is None:
                 ilasm.opcode('ldnull')
             else:

Modified: pypy/branch/unicode-objspace/pypy/translator/cli/metavm.py
==============================================================================
--- pypy/branch/unicode-objspace/pypy/translator/cli/metavm.py	(original)
+++ pypy/branch/unicode-objspace/pypy/translator/cli/metavm.py	Fri Nov  9 18:00:53 2007
@@ -55,7 +55,7 @@
                 generator.load(arg)
 
         # XXX: very hackish, need refactoring
-        if this.concretetype is ootype.String:
+        if this.concretetype in (ootype.String, ootype.Unicode):
             # special case for string: don't use methods, but plain functions
             METH = this.concretetype._METHODS[method_name]
             cts = generator.cts
@@ -106,6 +106,17 @@
         generator.load(op.args[1])
         generator.call_signature('string [pypylib]pypy.runtime.Utils::OOString(%s, int32)' % argtype)
 
+class _OOUnicode(MicroInstruction):
+    def render(self, generator, op):
+        from pypy.objspace.flow.model import Constant
+        ARGTYPE = op.args[0].concretetype
+        argtype = generator.cts.lltype_to_cts(ARGTYPE)
+        v_base = op.args[1]
+        assert v_base.value == -1, "The second argument of oounicode must be -1"
+        
+        generator.load(op.args[0])
+        generator.call_signature('string [pypylib]pypy.runtime.Utils::OOUnicode(%s)' % argtype)
+
 class _NewCustomDict(MicroInstruction):
     def render(self, generator, op):
         DICT = op.args[0].value
@@ -226,6 +237,7 @@
 IndirectCall = _IndirectCall()
 RuntimeNew = _RuntimeNew()
 OOString = _OOString()
+OOUnicode = _OOUnicode()
 NewCustomDict = _NewCustomDict()
 #CastWeakAdrToPtr = _CastWeakAdrToPtr()
 Box = _Box()

Modified: pypy/branch/unicode-objspace/pypy/translator/cli/opcodes.py
==============================================================================
--- pypy/branch/unicode-objspace/pypy/translator/cli/opcodes.py	(original)
+++ pypy/branch/unicode-objspace/pypy/translator/cli/opcodes.py	Fri Nov  9 18:00:53 2007
@@ -1,7 +1,7 @@
 from pypy.translator.cli.metavm import  Call, CallMethod, \
      IndirectCall, GetField, SetField, OOString, DownCast, NewCustomDict,\
      MapException, Box, Unbox, NewArray, GetArrayElem, SetArrayElem,\
-     TypeOf, CastPrimitive
+     TypeOf, CastPrimitive, OOUnicode
 from pypy.translator.oosupport.metavm import PushArg, PushAllArgs, StoreResult, InstructionList,\
     New, RuntimeNew, CastTo, PushPrimitive
 from pypy.translator.cli.cts import WEAKREF
@@ -50,6 +50,7 @@
     'ooidentityhash':           [PushAllArgs, 'callvirt instance int32 object::GetHashCode()'],
     'oohash':                   [PushAllArgs, 'callvirt instance int32 object::GetHashCode()'],    
     'oostring':                 [OOString],
+    'oounicode':                [OOUnicode],
     'ooparse_int':              [PushAllArgs, 'call int32 [pypylib]pypy.runtime.Utils::OOParseInt(string, int32)'],
     'ooparse_float':            [PushAllArgs, 'call float64 [pypylib]pypy.runtime.Utils::OOParseFloat(string)'],
     'oonewcustomdict':          [NewCustomDict],

Modified: pypy/branch/unicode-objspace/pypy/translator/cli/prebuiltnodes.py
==============================================================================
--- pypy/branch/unicode-objspace/pypy/translator/cli/prebuiltnodes.py	(original)
+++ pypy/branch/unicode-objspace/pypy/translator/cli/prebuiltnodes.py	Fri Nov  9 18:00:53 2007
@@ -25,10 +25,14 @@
 def raise_ZeroDivisionError():
     raise ZeroDivisionError
 
+def raise_UnicodeDecodeError():
+    raise UnicodeDecodeError
+
 HELPERS = [(raise_RuntimeError, []),
            (raise_OverflowError, []),
            (raise_ValueError, []),
            (raise_ZeroDivisionError, []),
+           (raise_UnicodeDecodeError, []),
            ]
 
 def _build_helpers(translator, db):

Modified: pypy/branch/unicode-objspace/pypy/translator/cli/src/pypylib.cs
==============================================================================
--- pypy/branch/unicode-objspace/pypy/translator/cli/src/pypylib.cs	(original)
+++ pypy/branch/unicode-objspace/pypy/translator/cli/src/pypylib.cs	Fri Nov  9 18:00:53 2007
@@ -28,6 +28,9 @@
             }
         }
 
+        public static string ToPython_unicode(string x) { return "u" + ToPython(x); }
+        public static string ToPython_unicode(char x)   { return "u" + ToPython(x); }
+
         public static string ToPython(object x) {
             if (x == null)
                 return "None";
@@ -103,6 +106,24 @@
             return b.ToString();
         }
 
+        private static void check_ascii(char ch)
+        {
+            if ((int)ch > 127)
+                Helpers.raise_UnicodeDecodeError();
+        }
+
+        public static string OOUnicode(char ch)
+        {
+            return ch.ToString();
+        }
+
+        public static string OOUnicode(string s)
+        {
+            foreach(char ch in s)
+                check_ascii(ch);
+            return s;
+        }
+
         public static int OOParseInt(string s, int base_)
         {
             return Convert.ToInt32(s, base_);

Modified: pypy/branch/unicode-objspace/pypy/translator/cli/src/stub/main.il
==============================================================================
--- pypy/branch/unicode-objspace/pypy/translator/cli/src/stub/main.il	(original)
+++ pypy/branch/unicode-objspace/pypy/translator/cli/src/stub/main.il	Fri Nov  9 18:00:53 2007
@@ -58,5 +58,13 @@
             throw
             ret
         }
+
+        .method public static void raise_UnicodeDecodeError() il managed
+        {
+            ldstr "This is only a stub, it should not be called"
+            newobj instance void class [mscorlib]System.ApplicationException::.ctor(string)
+            throw
+            ret
+        }
     }
 }

Modified: pypy/branch/unicode-objspace/pypy/translator/cli/test/runtest.py
==============================================================================
--- pypy/branch/unicode-objspace/pypy/translator/cli/test/runtest.py	(original)
+++ pypy/branch/unicode-objspace/pypy/translator/cli/test/runtest.py	Fri Nov  9 18:00:53 2007
@@ -28,6 +28,12 @@
 def format_object(TYPE, cts, ilasm):
     if TYPE is ootype.Void:
         ilasm.opcode('ldstr "None"')
+    elif TYPE in (ootype.Unicode, ootype.UniChar):
+        # the CLI type for Unicode is the very same as for
+        # ootype.String, so we can't rely on overloading to
+        # distinguish
+        type_ = cts.lltype_to_cts(TYPE)
+        ilasm.call('string class [pypylib]pypy.test.Result::ToPython_unicode(%s)' % type_)
     else:
         if isinstance(TYPE, (ootype.BuiltinType, ootype.Instance, ootype.StaticMethod)) and TYPE is not ootype.String:
             type_ = 'object'
@@ -296,6 +302,9 @@
     def ll_to_string(self, s):
         return s
 
+    def ll_to_unicode(self, s):
+        return s
+
     def ll_to_list(self, l):
         return l
 

Modified: pypy/branch/unicode-objspace/pypy/translator/oosupport/constant.py
==============================================================================
--- pypy/branch/unicode-objspace/pypy/translator/oosupport/constant.py	(original)
+++ pypy/branch/unicode-objspace/pypy/translator/oosupport/constant.py	Fri Nov  9 18:00:53 2007
@@ -32,7 +32,7 @@
 
 PRIMITIVE_TYPES = set([ootype.Void, ootype.Bool, ootype.Char, ootype.UniChar,
                        ootype.Float, ootype.Signed, ootype.Unsigned,
-                       ootype.String, ootype.SignedLongLong,
+                       ootype.String, ootype.Unicode, ootype.SignedLongLong,
                        ootype.UnsignedLongLong])
 
 def is_primitive(TYPE):



More information about the Pypy-commit mailing list