[pypy-commit] pypy numpy-record-dtypes: unicode and string dtypes
fijal
noreply at buildbot.pypy.org
Thu Feb 9 13:54:47 CET 2012
Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: numpy-record-dtypes
Changeset: r52276:18ad78aa8ed4
Date: 2012-02-09 14:54 +0200
http://bitbucket.org/pypy/pypy/changeset/18ad78aa8ed4/
Log: unicode and string dtypes
diff --git a/pypy/module/micronumpy/interp_dtype.py b/pypy/module/micronumpy/interp_dtype.py
--- a/pypy/module/micronumpy/interp_dtype.py
+++ b/pypy/module/micronumpy/interp_dtype.py
@@ -16,7 +16,8 @@
BOOLLTR = "b"
FLOATINGLTR = "f"
VOIDLTR = 'V'
-
+STRINGLTR = 'S'
+UNICODELTR = 'U'
VOID_STORAGE = lltype.Array(lltype.Char, hints={'nolength': True, 'render_as_void': True})
@@ -139,6 +140,41 @@
"V", space.gettypefor(interp_boxes.W_VoidBox), fields=fields,
fieldnames=fieldnames)
+def variable_dtype(space, name):
+ if name[0] in '<>':
+ # ignore byte order, not sure if it's worth it for unicode only
+ if name[0] != byteorder_prefix and name[1] == 'U':
+ xxx
+ name = name[1:]
+ char = name[0]
+ if len(name) == 1:
+ size = 0
+ else:
+ try:
+ size = int(name[1:])
+ except ValueError:
+ raise OperationError(space.w_TypeError, space.wrap("data type not understood"))
+ if char == 'S':
+ itemtype = types.StringType(size)
+ basename = 'string'
+ num = 18
+ w_box_type = space.gettypefor(interp_boxes.W_StringBox)
+ elif char == 'V':
+ num = 20
+ basename = 'void'
+ w_box_type = space.gettypefor(interp_boxes.W_VoidBox)
+ xxx
+ else:
+ assert char == 'U'
+ basename = 'unicode'
+ itemtype = types.UnicodeType(size)
+ num = 19
+ w_box_type = space.gettypefor(interp_boxes.W_UnicodeBox)
+ return W_Dtype(itemtype, num, char,
+ basename + str(8 * itemtype.get_element_size()),
+ char, w_box_type)
+
+
def descr__new__(space, w_subtype, w_dtype):
cache = get_dtype_cache(space)
@@ -148,12 +184,18 @@
return w_dtype
elif space.isinstance_w(w_dtype, space.w_str):
name = space.str_w(w_dtype)
+ if ',' in name:
+ return dtype_from_spec(space, name)
try:
return cache.dtypes_by_name[name]
except KeyError:
pass
+ if name[0] in 'VSU' or name[0] in '<>' and name[1] in 'VSU':
+ return variable_dtype(space, name)
elif space.isinstance_w(w_dtype, space.w_list):
return dtype_from_list(space, w_dtype)
+ elif space.isinstance_w(w_dtype, space.w_dict):
+ return dtype_from_dict(space, w_dtype)
else:
for dtype in cache.builtin_dtypes:
if w_dtype in dtype.alternate_constructors:
@@ -323,13 +365,42 @@
char='Q',
w_box_type = space.gettypefor(interp_boxes.W_ULongLongBox),
)
+ self.w_stringdtype = W_Dtype(
+ types.StringType(0),
+ num=18,
+ kind=STRINGLTR,
+ name='string',
+ char='S',
+ w_box_type = space.gettypefor(interp_boxes.W_StringBox),
+ alternate_constructors=[space.w_str],
+ )
+ self.w_unicodedtype = W_Dtype(
+ types.UnicodeType(0),
+ num=19,
+ kind=UNICODELTR,
+ name='unicode',
+ char='U',
+ w_box_type = space.gettypefor(interp_boxes.W_UnicodeBox),
+ alternate_constructors=[space.w_unicode],
+ )
+ self.w_voiddtype = W_Dtype(
+ types.VoidType(0),
+ num=20,
+ kind=VOIDLTR,
+ name='void',
+ char='V',
+ w_box_type = space.gettypefor(interp_boxes.W_VoidBox),
+ #alternate_constructors=[space.w_buffer],
+ # XXX no buffer in space
+ )
self.builtin_dtypes = [
self.w_booldtype, self.w_int8dtype, self.w_uint8dtype,
self.w_int16dtype, self.w_uint16dtype, self.w_int32dtype,
self.w_uint32dtype, self.w_longdtype, self.w_ulongdtype,
self.w_longlongdtype, self.w_ulonglongdtype,
self.w_float32dtype,
- self.w_float64dtype
+ self.w_float64dtype, self.w_stringdtype, self.w_unicodedtype,
+ self.w_voiddtype,
]
self.dtypes_by_num_bytes = sorted(
(dtype.itemtype.get_element_size(), dtype)
@@ -343,8 +414,9 @@
self.dtypes_by_name[byteorder_prefix + can_name] = dtype
new_name = nonnative_byteorder_prefix + can_name
itemtypename = dtype.itemtype.__class__.__name__
+ itemtype = getattr(types, 'NonNative' + itemtypename)()
self.dtypes_by_name[new_name] = W_Dtype(
- getattr(types, 'NonNative' + itemtypename)(),
+ itemtype,
dtype.num, dtype.kind, new_name, dtype.char, dtype.w_box_type)
for alias in dtype.aliases:
self.dtypes_by_name[alias] = dtype
diff --git a/pypy/module/micronumpy/test/test_dtypes.py b/pypy/module/micronumpy/test/test_dtypes.py
--- a/pypy/module/micronumpy/test/test_dtypes.py
+++ b/pypy/module/micronumpy/test/test_dtypes.py
@@ -467,6 +467,7 @@
def test_str_dtype(self):
from _numpypy import dtype, str_
+ raises(TypeError, "dtype('Sx')")
d = dtype('S8')
assert d.itemsize == 8
assert dtype(str) == dtype('S')
@@ -475,6 +476,18 @@
assert d.name == "string64"
assert d.num == 18
+ def test_unicode_dtype(self):
+ from _numpypy import dtype, unicode_
+
+ raises(TypeError, "dtype('Ux')")
+ d = dtype('U8')
+ assert d.itemsize == 8 * 4
+ assert dtype(unicode) == dtype('U')
+ assert d.kind == 'U'
+ assert d.type is unicode_
+ assert d.name == "unicode256"
+ assert d.num == 19
+
class AppTestRecordDtypes(BaseNumpyAppTest):
def test_create(self):
from _numpypy import dtype, void
diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py
--- a/pypy/module/micronumpy/types.py
+++ b/pypy/module/micronumpy/types.py
@@ -606,7 +606,7 @@
class BaseStringType(object):
_mixin_ = True
- def __init__(self, size):
+ def __init__(self, size=0):
self.size = size
def get_element_size(self):
@@ -614,10 +614,15 @@
class StringType(BaseType, BaseStringType):
T = lltype.Char
+VoidType = StringType # why not?
+NonNativeVoidType = VoidType
+NonNativeStringType = StringType
class UnicodeType(BaseType, BaseStringType):
T = lltype.UniChar
+NonNativeUnicodeType = UnicodeType
+
class RecordType(CompositeType):
pass
More information about the pypy-commit
mailing list