[pypy-commit] pypy utf8-unicode2: Fix array
waedt
noreply at buildbot.pypy.org
Tue Jul 29 16:17:00 CEST 2014
Author: Tyler Wade <wayedt at gmail.com>
Branch: utf8-unicode2
Changeset: r72606:60fb6d7660b0
Date: 2014-07-19 10:21 -0500
http://bitbucket.org/pypy/pypy/changeset/60fb6d7660b0/
Log: Fix array
diff --git a/pypy/interpreter/utf8.py b/pypy/interpreter/utf8.py
--- a/pypy/interpreter/utf8.py
+++ b/pypy/interpreter/utf8.py
@@ -7,15 +7,18 @@
wchar_rint = rffi.r_uint
WCHAR_INTP = rffi.UINTP
+WCHAR_INT = rffi.UINT
if rffi.sizeof(rffi.WCHAR_T) == 2:
wchar_rint = rffi.r_ushort
WCHAR_INTP = rffi.USHORTP
+ WCHAR_INT = rffi.USHORT
-def utf8chr(value):
+def utf8chr(value, allow_large_codepoints=False):
# Like unichr, but returns a Utf8Str object
+ # TODO: Do this without the builder so its faster
b = Utf8Builder()
- b.append(value)
+ b.append(value, allow_large_codepoints=allow_large_codepoints)
return b.build()
def utf8ord_bytes(bytes, start):
@@ -550,7 +553,7 @@
@specialize.argtype(1)
- def append(self, c):
+ def append(self, c, allow_large_codepoints=False):
if isinstance(c, int) or isinstance(c, r_uint):
if c < 0x80:
self._builder.append(chr(c))
@@ -563,7 +566,7 @@
self._builder.append(chr(0x80 | (c >> 6 & 0x3F)))
self._builder.append(chr(0x80 | (c & 0x3F)))
self._is_ascii = False
- elif c <= 0x10FFFF:
+ elif c <= 0x10FFFF or allow_large_codepoints:
self._builder.append(chr(0xF0 | (c >> 18)))
self._builder.append(chr(0x80 | (c >> 12 & 0x3F)))
self._builder.append(chr(0x80 | (c >> 6 & 0x3F)))
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -3,7 +3,7 @@
from rpython.rlib import jit
from rpython.rlib.buffer import Buffer
from rpython.rlib.objectmodel import keepalive_until_here
-from rpython.rlib.rarithmetic import ovfcheck, widen
+from rpython.rlib.rarithmetic import ovfcheck, widen, intmask
from rpython.rlib.unroll import unrolling_iterable
from rpython.rtyper.annlowlevel import llstr
from rpython.rtyper.lltypesystem import lltype, rffi
@@ -15,6 +15,8 @@
interp2app, interpindirect2app, unwrap_spec)
from pypy.interpreter.typedef import (
GetSetProperty, TypeDef, make_weakref_descr)
+from pypy.interpreter import utf8
+from pypy.interpreter.utf8 import Utf8Str, utf8ord, utf8chr
from pypy.module._file.interp_file import W_File
from pypy.objspace.std.floatobject import W_FloatObject
@@ -314,7 +316,7 @@
"""
if self.typecode == 'u':
buf = rffi.cast(UNICODE_ARRAY, self._buffer_as_unsigned())
- return space.wrap(rffi.wcharpsize2unicode(buf, self.len))
+ return space.wrap(Utf8Str.from_wcharpsize(buf, self.len))
else:
msg = "tounicode() may only be called on type 'u' arrays"
raise OperationError(space.w_ValueError, space.wrap(msg))
@@ -570,7 +572,7 @@
types = {
'c': TypeCode(lltype.Char, 'str_w', method=''),
- 'u': TypeCode(lltype.UniChar, 'unicode_w', method=''),
+ 'u': TypeCode(utf8.WCHAR_INT, 'unicode_w', method=''),
'b': TypeCode(rffi.SIGNEDCHAR, 'int_w', True, True),
'B': TypeCode(rffi.UCHAR, 'int_w', True),
'h': TypeCode(rffi.SHORT, 'int_w', True, True),
@@ -670,7 +672,10 @@
if len(item) != 1:
msg = 'array item must be char'
raise OperationError(space.w_TypeError, space.wrap(msg))
- item = item[0]
+ if mytype.unwrap == 'str_w':
+ item = item[0]
+ else:
+ item = utf8ord(item)
return rffi.cast(mytype.itemtype, item)
#
# "regular" case: it fits in an rpython integer (lltype.Signed)
@@ -791,6 +796,9 @@
item = rffi.cast(lltype.Signed, item)
elif mytype.typecode == 'f':
item = float(item)
+ elif mytype.typecode == 'u':
+ # TODO: Does this nned special handling for 16bit whar_t?
+ item = utf8chr(intmask(item), allow_large_codepoints=True)
return space.wrap(item)
# interface
More information about the pypy-commit
mailing list