[pypy-commit] pypy numpy-record-dtypes: unicode and string dtypes

fijal noreply at buildbot.pypy.org
Thu Feb 9 13:54:47 CET 2012


Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: numpy-record-dtypes
Changeset: r52276:18ad78aa8ed4
Date: 2012-02-09 14:54 +0200
http://bitbucket.org/pypy/pypy/changeset/18ad78aa8ed4/

Log:	unicode and string dtypes

diff --git a/pypy/module/micronumpy/interp_dtype.py b/pypy/module/micronumpy/interp_dtype.py
--- a/pypy/module/micronumpy/interp_dtype.py
+++ b/pypy/module/micronumpy/interp_dtype.py
@@ -16,7 +16,8 @@
 BOOLLTR = "b"
 FLOATINGLTR = "f"
 VOIDLTR = 'V'
-
+STRINGLTR = 'S'
+UNICODELTR = 'U'
 
 VOID_STORAGE = lltype.Array(lltype.Char, hints={'nolength': True, 'render_as_void': True})
 
@@ -139,6 +140,41 @@
                    "V", space.gettypefor(interp_boxes.W_VoidBox), fields=fields,
                    fieldnames=fieldnames)
 
+def variable_dtype(space, name):
+    if name[0] in '<>':
+        # ignore byte order, not sure if it's worth it for unicode only
+        if name[0] != byteorder_prefix and name[1] == 'U':
+            xxx
+        name = name[1:]
+    char = name[0]
+    if len(name) == 1:
+        size = 0
+    else:
+        try:
+            size = int(name[1:])
+        except ValueError:
+            raise OperationError(space.w_TypeError, space.wrap("data type not understood"))
+    if char == 'S':
+        itemtype = types.StringType(size)
+        basename = 'string'
+        num = 18
+        w_box_type = space.gettypefor(interp_boxes.W_StringBox)
+    elif char == 'V':
+        num = 20
+        basename = 'void'
+        w_box_type = space.gettypefor(interp_boxes.W_VoidBox)
+        xxx
+    else:
+        assert char == 'U'
+        basename = 'unicode'
+        itemtype = types.UnicodeType(size)
+        num = 19
+        w_box_type = space.gettypefor(interp_boxes.W_UnicodeBox)
+    return W_Dtype(itemtype, num, char,
+                   basename + str(8 * itemtype.get_element_size()),
+                   char, w_box_type)
+
+
 def descr__new__(space, w_subtype, w_dtype):
     cache = get_dtype_cache(space)
 
@@ -148,12 +184,18 @@
         return w_dtype
     elif space.isinstance_w(w_dtype, space.w_str):
         name = space.str_w(w_dtype)
+        if ',' in name:
+            return dtype_from_spec(space, name)
         try:
             return cache.dtypes_by_name[name]
         except KeyError:
             pass
+        if name[0] in 'VSU' or name[0] in '<>' and name[1] in 'VSU':
+            return variable_dtype(space, name)
     elif space.isinstance_w(w_dtype, space.w_list):
         return dtype_from_list(space, w_dtype)
+    elif space.isinstance_w(w_dtype, space.w_dict):
+        return dtype_from_dict(space, w_dtype)
     else:
         for dtype in cache.builtin_dtypes:
             if w_dtype in dtype.alternate_constructors:
@@ -323,13 +365,42 @@
             char='Q',
             w_box_type = space.gettypefor(interp_boxes.W_ULongLongBox),
         )
+        self.w_stringdtype = W_Dtype(
+            types.StringType(0),
+            num=18,
+            kind=STRINGLTR,
+            name='string',
+            char='S',
+            w_box_type = space.gettypefor(interp_boxes.W_StringBox),
+            alternate_constructors=[space.w_str],
+        )
+        self.w_unicodedtype = W_Dtype(
+            types.UnicodeType(0),
+            num=19,
+            kind=UNICODELTR,
+            name='unicode',
+            char='U',
+            w_box_type = space.gettypefor(interp_boxes.W_UnicodeBox),
+            alternate_constructors=[space.w_unicode],
+        )
+        self.w_voiddtype = W_Dtype(
+            types.VoidType(0),
+            num=20,
+            kind=VOIDLTR,
+            name='void',
+            char='V',
+            w_box_type = space.gettypefor(interp_boxes.W_VoidBox),
+            #alternate_constructors=[space.w_buffer],
+            # XXX no buffer in space
+        )
         self.builtin_dtypes = [
             self.w_booldtype, self.w_int8dtype, self.w_uint8dtype,
             self.w_int16dtype, self.w_uint16dtype, self.w_int32dtype,
             self.w_uint32dtype, self.w_longdtype, self.w_ulongdtype,
             self.w_longlongdtype, self.w_ulonglongdtype,
             self.w_float32dtype,
-            self.w_float64dtype
+            self.w_float64dtype, self.w_stringdtype, self.w_unicodedtype,
+            self.w_voiddtype,
         ]
         self.dtypes_by_num_bytes = sorted(
             (dtype.itemtype.get_element_size(), dtype)
@@ -343,8 +414,9 @@
             self.dtypes_by_name[byteorder_prefix + can_name] = dtype
             new_name = nonnative_byteorder_prefix + can_name
             itemtypename = dtype.itemtype.__class__.__name__
+            itemtype = getattr(types, 'NonNative' + itemtypename)()
             self.dtypes_by_name[new_name] = W_Dtype(
-                getattr(types, 'NonNative' + itemtypename)(),
+                itemtype,
                 dtype.num, dtype.kind, new_name, dtype.char, dtype.w_box_type)
             for alias in dtype.aliases:
                 self.dtypes_by_name[alias] = dtype
diff --git a/pypy/module/micronumpy/test/test_dtypes.py b/pypy/module/micronumpy/test/test_dtypes.py
--- a/pypy/module/micronumpy/test/test_dtypes.py
+++ b/pypy/module/micronumpy/test/test_dtypes.py
@@ -467,6 +467,7 @@
     def test_str_dtype(self):
         from _numpypy import dtype, str_
 
+        raises(TypeError, "dtype('Sx')")
         d = dtype('S8')
         assert d.itemsize == 8
         assert dtype(str) == dtype('S')
@@ -475,6 +476,18 @@
         assert d.name == "string64"
         assert d.num == 18
 
+    def test_unicode_dtype(self):
+        from _numpypy import dtype, unicode_
+
+        raises(TypeError, "dtype('Ux')")
+        d = dtype('U8')
+        assert d.itemsize == 8 * 4
+        assert dtype(unicode) == dtype('U')
+        assert d.kind == 'U'
+        assert d.type is unicode_
+        assert d.name == "unicode256"
+        assert d.num == 19
+
 class AppTestRecordDtypes(BaseNumpyAppTest):
     def test_create(self):
         from _numpypy import dtype, void
diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py
--- a/pypy/module/micronumpy/types.py
+++ b/pypy/module/micronumpy/types.py
@@ -606,7 +606,7 @@
 class BaseStringType(object):
     _mixin_ = True
     
-    def __init__(self, size):
+    def __init__(self, size=0):
         self.size = size
 
     def get_element_size(self):
@@ -614,10 +614,15 @@
 
 class StringType(BaseType, BaseStringType):
     T = lltype.Char
+VoidType = StringType # why not?
+NonNativeVoidType = VoidType
+NonNativeStringType = StringType
 
 class UnicodeType(BaseType, BaseStringType):
     T = lltype.UniChar
 
+NonNativeUnicodeType = UnicodeType
+
 class RecordType(CompositeType):
     pass
 


More information about the pypy-commit mailing list