[pypy-svn] r13031 - pypy/dist/pypy/lib

ale at codespeak.net ale at codespeak.net
Fri Jun 3 11:14:48 CEST 2005


Author: ale
Date: Fri Jun  3 11:14:48 2005
New Revision: 13031

Modified:
   pypy/dist/pypy/lib/inprogress__codecs.py
Log:
more bugs fixed in unicode-internal

Modified: pypy/dist/pypy/lib/inprogress__codecs.py
==============================================================================
--- pypy/dist/pypy/lib/inprogress__codecs.py	(original)
+++ pypy/dist/pypy/lib/inprogress__codecs.py	Fri Jun  3 11:14:48 2005
@@ -159,7 +159,7 @@
     """None
     """
     res = PyUnicode_DecodeRawUnicodeEscape(data, len(data), errors)
-    res = ''.join(res)
+    res = u''.join(res)
     return res,len(res)
 
 def utf_7_decode( data,errors='strict'):
@@ -212,14 +212,40 @@
     res = ''.join(res)
     return res, len(res)
 
+unicode_bytes = (len(hex(sys.maxunicode))-1)/2
+
 def unicode_internal_encode( obj,errors='strict'):
     """None
     """
     if type(obj) == unicode:
-        return obj, len(obj)
+        p = []
+        t = [ord(x) for x in obj]
+        for i in t:
+            for j in xrange(unicode_bytes):
+                p += chr(i%256)
+                i >>= 8
+        res = ''.join(p)
+        return res, len(res)
     else:
         return ''.join(PyUnicode_FromUnicode(obj,size),size)
 
+def unicode_internal_decode( unistr,errors='strict'):
+    """None
+    """
+    if type(unistr) == unicode:
+        return unistr,len(unistr)
+    else:
+        p=[]
+        i=0
+        while i < len(unistr)-unicode_bytes+1:
+            t = 0
+            for j in range(unicode_bytes):
+                t += ord(unistr[i+j])<<(j*8)
+            i += j+1
+            p += unichr(t)
+        res = u''.join(p)
+        return res, len(res)
+
 def utf_16_ex_decode( data,errors='strict'):
     """None
     """
@@ -306,14 +332,6 @@
     res = ''.join(res)
     return res, len(res)
 
-def unicode_internal_decode( unistr,errors='strict'):
-    """None
-    """
-    if type(unistr) == unicode:
-        return unistr,len(unistr)
-    else:
-        return unicode(unistr),len(unistr)
-
 def utf_16_le_decode( data,errors='strict'):
     """None
     """



More information about the Pypy-commit mailing list