[Numpy-svn] r8306 - in trunk/numpy/lib: . tests

Fri Mar 26 01:15:57 EDT 2010

Author: pierregm
Date: 2010-03-26 00:15:57 -0500 (Fri, 26 Mar 2010)
New Revision: 8306

Modified:
   trunk/numpy/lib/recfunctions.py
   trunk/numpy/lib/tests/test_recfunctions.py
Log:
* Fixed merge_arrays for arrays of size 1 (bug #1407)
* merge_arrays now accepts sequences of lists/tuples as inputs

Modified: trunk/numpy/lib/recfunctions.py
===================================================================

--- trunk/numpy/lib/recfunctions.py	2010-03-25 03:32:17 UTC (rev 8305)
+++ trunk/numpy/lib/recfunctions.py	2010-03-26 05:15:57 UTC (rev 8306)
@@ -217,7 +217,7 @@
         current = adtype[name]
         if current.names:
             if lastname:
-                parents[name] = [lastname,]
+                parents[name] = [lastname, ]
             else:
                 parents[name] = []
             parents.update(get_fieldstructure(current, name, parents))
@@ -227,7 +227,7 @@
 #                if (lastparent[-1] != lastname):
                     lastparent.append(lastname)
             elif lastname:
-                lastparent = [lastname,]
+                lastparent = [lastname, ]
             parents[name] = lastparent or []
     return parents or None
 
@@ -274,7 +274,7 @@
         Whether to
     """
     # OK, that's a complete ripoff from Python2.6 itertools.izip_longest
-    def sentinel(counter = ([fill_value]*(len(seqarrays)-1)).pop):
+    def sentinel(counter=([fill_value] * (len(seqarrays) - 1)).pop):
         "Yields the fill_value or raises IndexError"
         yield counter()
     #
@@ -324,8 +324,9 @@
     return output
 
 
+
 def merge_arrays(seqarrays,
-                 fill_value=-1, flatten=False, usemask=True, asrecarray=False):
+                 fill_value= -1, flatten=False, usemask=False, asrecarray=False):
     """
     Merge arrays field by field.
 
@@ -372,62 +373,92 @@
             True    for boolean values
     * XXX: I just obtained these values empirically
     """
+    # Only one item in the input sequence ?
     if (len(seqarrays) == 1):
-        seqarrays = seqarrays[0]
-    if isinstance(seqarrays, ndarray):
+        seqarrays = np.asanyarray(seqarrays[0])
+    # Do we have a single ndarary as input ?
+    if isinstance(seqarrays, (ndarray, np.void)):
         seqdtype = seqarrays.dtype
         if (not flatten) or \
            (zip_descr((seqarrays,), flatten=True) == seqdtype.descr):
+            # Minimal processing needed: just make sure everythng's a-ok
             seqarrays = seqarrays.ravel()
+            # Make sure we have named fields
             if not seqdtype.names:
-                seqarrays = seqarrays.view([('', seqdtype)])
+                seqdtype = [('', seqdtype)]
+            # Find what type of array we must return
             if usemask:
                 if asrecarray:
-                    return seqarrays.view(MaskedRecords)
-                return seqarrays.view(MaskedArray)
+                    seqtype = MaskedRecords
+                else:
+                    seqtype = MaskedArray
             elif asrecarray:
-                return seqarrays.view(recarray)
-            return seqarrays
+                seqtype = recarray
+            else:
+                seqtype = ndarray
+            return seqarrays.view(dtype=seqdtype, type=seqtype)
         else:
             seqarrays = (seqarrays,)
-    # Get the dtype
+    else:
+        # Make sure we have arrays in the input sequence
+        seqarrays = map(np.asanyarray, seqarrays)
+    # Find the sizes of the inputs and their maximum
+    sizes = tuple(a.size for a in seqarrays)
+    maxlength = max(sizes)
+    # Get the dtype of the output (flattening if needed)
     newdtype = zip_descr(seqarrays, flatten=flatten)
-    # Get the data and the fill_value from each array
-    seqdata = [ma.getdata(a.ravel()) for a in seqarrays]
-    seqmask = [ma.getmaskarray(a).ravel() for a in seqarrays]
-    fill_value = [_check_fill_value(fill_value, a.dtype) for a in seqdata]
-    # Make an iterator from each array, padding w/ fill_values
-    maxlength = max(len(a) for a in seqarrays)
-    for (i, (a, m, fval)) in enumerate(zip(seqdata, seqmask, fill_value)):
-        # Flatten the fill_values if there's only one field
-        if isinstance(fval, (ndarray, np.void)):
-            fmsk = ma.ones((1,), m.dtype)[0]
-            if len(fval.dtype) == 1:
-                fval = fval.item()[0]
+    # Initialize the sequences for data and mask
+    seqdata = []
+    seqmask = []
+    # If we expect some kind of MaskedArray, make a special loop.
+    if usemask:
+        for (a, n) in itertools.izip(seqarrays, sizes):
+            nbmissing = (maxlength - n)
+            # Get the data and mask
+            data = a.ravel().__array__()
+            mask = ma.getmaskarray(a).ravel()
+            # Get the filling value (if needed)
+            if nbmissing:
+                fval = _check_fill_value(fill_value, a.dtype)
+                if isinstance(fval, (ndarray, np.void)):
+                    if len(fval.dtype) == 1:
+                        fval = fval.item()[0]
+                        fmsk = True
+                    else:
+                        fval = np.array(fval, dtype=a.dtype, ndmin=1)
+                        fmsk = np.ones((1,), dtype=mask.dtype)
+            else:
+                fval = None
                 fmsk = True
-            else:
-                # fval and fmsk should be np.void objects
-                fval = np.array([fval,], dtype=a.dtype)[0]
-#                fmsk = np.array([fmsk,], dtype=m.dtype)[0]
-        else:
-            fmsk = True
-        nbmissing = (maxlength-len(a))
-        seqdata[i] = itertools.chain(a, [fval]*nbmissing)
-        seqmask[i] = itertools.chain(m, [fmsk]*nbmissing)
-    #
-    data = izip_records(seqdata, flatten=flatten)
-    data = tuple(data)
-    if usemask:
-        mask = izip_records(seqmask, fill_value=True, flatten=flatten)
-        mask = tuple(mask)
-        output = ma.array(np.fromiter(data, dtype=newdtype))
-        output._mask[:] = list(mask)
+            # Store an iterator padding the input to the expected length
+            seqdata.append(itertools.chain(data, [fval] * nbmissing))
+            seqmask.append(itertools.chain(mask, [fmsk] * nbmissing))
+        # Create an iterator for the data
+        data = tuple(izip_records(seqdata, flatten=flatten))
+        output = ma.array(np.fromiter(data, dtype=newdtype, count=maxlength),
+                          mask=list(izip_records(seqmask, flatten=flatten)))
         if asrecarray:
             output = output.view(MaskedRecords)
     else:
-        output = np.fromiter(data, dtype=newdtype)
+        # Same as before, without the mask we don't need...
+        for (a, n) in itertools.izip(seqarrays, sizes):
+            nbmissing = (maxlength - n)
+            data = a.ravel().__array__()
+            if nbmissing:
+                fval = _check_fill_value(fill_value, a.dtype)
+                if isinstance(fval, (ndarray, np.void)):
+                    if len(fval.dtype) == 1:
+                        fval = fval.item()[0]
+                    else:
+                        fval = np.array(fval, dtype=a.dtype, ndmin=1)
+            else:
+                fval = None
+            seqdata.append(itertools.chain(data, [fval] * nbmissing))
+        output = np.fromiter(tuple(izip_records(seqdata, flatten=flatten)),
+                             dtype=newdtype, count=maxlength)
         if asrecarray:
             output = output.view(recarray)
+    # And we're done...
     return output
 
 
@@ -467,7 +498,7 @@
           dtype=[('a', '<i4')])
     """
     if _is_string_like(drop_names):
-        drop_names = [drop_names,]
+        drop_names = [drop_names, ]
     else:
         drop_names = set(drop_names)
     #
@@ -542,7 +573,7 @@
 
 
 def append_fields(base, names, data=None, dtypes=None,
-                  fill_value=-1, usemask=True, asrecarray=False):
+                  fill_value= -1, usemask=True, asrecarray=False):
     """
     Add new fields to an existing array.
 
@@ -577,14 +608,14 @@
             err_msg = "The number of arrays does not match the number of names"
             raise ValueError(err_msg)
     elif isinstance(names, basestring):
-        names = [names,]
-        data = [data,]
+        names = [names, ]
+        data = [data, ]
     #
     if dtypes is None:
         data = [np.array(a, copy=False, subok=True) for a in data]
         data = [a.view([(name, a.dtype)]) for (name, a) in zip(names, data)]
     elif not hasattr(dtypes, '__iter__'):
-        dtypes = [dtypes,]
+        dtypes = [dtypes, ]
         if len(data) != len(dtypes):
             if len(dtypes) == 1:
                 dtypes = dtypes * len(data)
@@ -712,7 +743,7 @@
                         current_descr[-1] = descr[1]
                         newdescr[nameidx] = tuple(current_descr)
                 elif descr[1] != current_descr[-1]:
-                    raise TypeError("Incompatible type '%s' <> '%s'" %\
+                    raise TypeError("Incompatible type '%s' <> '%s'" % \
                                     (dict(newdescr)[name], descr[1]))
     # Only one field: use concatenate
     if len(newdescr) == 1:
@@ -849,14 +880,14 @@
                          "'outer' or 'leftouter' (got '%s' instead)" % jointype)
     # If we have a single key, put it in a tuple
     if isinstance(key, basestring):
-        key = (key, )
+        key = (key,)
 
     # Check the keys
     for name in key:
         if name not in r1.dtype.names:
-            raise ValueError('r1 does not have key field %s'%name)
+            raise ValueError('r1 does not have key field %s' % name)
         if name not in r2.dtype.names:
-            raise ValueError('r2 does not have key field %s'%name)
+            raise ValueError('r2 does not have key field %s' % name)
 
     # Make sure we work with ravelled arrays
     r1 = r1.ravel()
@@ -915,7 +946,7 @@
             else:
                 current[0] += r1postfix
                 desc[0] += r2postfix
-                ndtype.insert(nameidx+1, desc)
+                ndtype.insert(nameidx + 1, desc)
         #... we haven't: just add the description to the current list
         else:
             names.extend(desc[0])
@@ -934,7 +965,7 @@
         current = output[f]
         current[:r1cmn] = selected[:r1cmn]
         if jointype in ('outer', 'leftouter'):
-            current[cmn:cmn+r1spc] = selected[r1cmn:]
+            current[cmn:cmn + r1spc] = selected[r1cmn:]
     for f in r2names:
         selected = s2[f]
         if f not in names:

Modified: trunk/numpy/lib/tests/test_recfunctions.py
===================================================================
--- trunk/numpy/lib/tests/test_recfunctions.py	2010-03-25 03:32:17 UTC (rev 8305)
+++ trunk/numpy/lib/tests/test_recfunctions.py	2010-03-26 05:15:57 UTC (rev 8306)
@@ -17,7 +17,7 @@
     """
     #
     def setUp(self):
-        x = np.array([1, 2,])
+        x = np.array([1, 2, ])
         y = np.array([10, 20, 30])
         z = np.array([('A', 1.), ('B', 2.)],
                      dtype=[('A', '|S3'), ('B', float)])
@@ -41,7 +41,7 @@
         assert_equal(test,
                      np.dtype([('', int), ('A', '|S3'), ('B', float)]))
         test = zip_descr((x, z), flatten=False)
-        assert_equal(test, 
+        assert_equal(test,
                      np.dtype([('', int),
                                ('', [('A', '|S3'), ('B', float)])]))
         # Standard & nested dtype 
@@ -71,7 +71,7 @@
         control = np.array([(1,), (4,)], dtype=[('a', int)])
         assert_equal(test, control)
         # A nested sub-field
-        test = drop_fields(a, ['ba',])
+        test = drop_fields(a, ['ba', ])
         control = np.array([(1, (3.0,)), (4, (6.0,))],
                      dtype=[('a', int), ('b', [('bb', int)])])
         assert_equal(test, control)
@@ -127,13 +127,13 @@
         # One 1-nested field
         ndtype = np.dtype([('A', int), ('B', [('BA', float), ('BB', '|S1')])])
         test = get_fieldstructure(ndtype)
-        assert_equal(test, {'A': [], 'B': [], 'BA':['B',], 'BB':['B']})
+        assert_equal(test, {'A': [], 'B': [], 'BA':['B', ], 'BB':['B']})
         # One 2-nested fields
         ndtype = np.dtype([('A', int),
                            ('B', [('BA', int),
                                   ('BB', [('BBA', int), ('BBB', int)])])])
         test = get_fieldstructure(ndtype)
-        control = {'A': [], 'B': [], 'BA': ['B'], 'BB': ['B'], 
+        control = {'A': [], 'B': [], 'BA': ['B'], 'BB': ['B'],
                    'BBA': ['B', 'BB'], 'BBB': ['B', 'BB']}
         assert_equal(test, control)
 
@@ -173,7 +173,7 @@
     def test_find_duplicates_ignoremask(self):
         "Test the ignoremask option of find_duplicates"
         ndtype = [('a', int)]
-        a = ma.array([1, 1, 1, 2, 2, 3, 3], 
+        a = ma.array([1, 1, 1, 2, 2, 3, 3],
                 mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype)
         test = find_duplicates(a, ignoremask=True, return_index=True)
         control = [0, 1, 3, 4]
@@ -218,7 +218,7 @@
     Test merge_arrays
     """
     def setUp(self):
-        x = np.array([1, 2,])
+        x = np.array([1, 2, ])
         y = np.array([10, 20, 30])
         z = np.array([('A', 1.), ('B', 2.)], dtype=[('A', '|S3'), ('B', float)])
         w = np.array([(1, (2, 3.0)), (4, (5, 6.0))],
@@ -291,7 +291,7 @@
         assert_equal(test, control)
         #
         test = merge_arrays((x, w), flatten=False)
-        controldtype = dtype=[('f0', int),
+        controldtype = dtype = [('f0', int),
                               ('f1', [('a', int),
                                       ('b', [('ba', float), ('bb', int)])])]
         control = np.array([(1., (1, (2, 3.0))), (2, (4, (5, 6.0)))],
@@ -325,6 +325,15 @@
         test = merge_arrays((z, np.array([10, 20, 30]).view([('C', int)])))
         control = np.array([('A', 1., 10), ('B', 2., 20), ('-1', -1, 20)],
                            dtype=[('A', '|S3'), ('B', float), ('C', int)])
+    #
+    def test_singlerecord(self):
+        (_, x, y, z) = self.data
+        test = merge_arrays((x[0], y[0], z[0]), usemask=False)
+        control = np.array([(1, 10, ('A', 1))],
+                           dtype=[('f0', int),
+                                  ('f1', int),
+                                  ('f2', [('A', '|S3'), ('B', float)])])
+        assert_equal(test, control)
 
 
 
@@ -333,7 +342,7 @@
     Test append_fields
     """
     def setUp(self):
-        x = np.array([1, 2,])
+        x = np.array([1, 2, ])
         y = np.array([10, 20, 30])
         z = np.array([('A', 1.), ('B', 2.)], dtype=[('A', '|S3'), ('B', float)])
         w = np.array([(1, (2, 3.0)), (4, (5, 6.0))],
@@ -387,7 +396,7 @@
     Test stack_arrays
     """
     def setUp(self):
-        x = np.array([1, 2,])
+        x = np.array([1, 2, ])
         y = np.array([10, 20, 30])
         z = np.array([('A', 1.), ('B', 2.)], dtype=[('A', '|S3'), ('B', float)])
         w = np.array([(1, (2, 3.0)), (4, (5, 6.0))],
@@ -436,7 +445,7 @@
         #
         test = stack_arrays((z, x))
         control = ma.array([('A', 1, -1), ('B', 2, -1),
-                            (-1, -1, 1), (-1, -1, 2),],
+                            (-1, -1, 1), (-1, -1, 2), ],
                       mask=[(0, 0, 1), (0, 0, 1),
                             (1, 1, 0), (1, 1, 0)],
                       dtype=[('A', '|S3'), ('B', float), ('f2', int)])
@@ -446,7 +455,7 @@
         test = stack_arrays((z, z, x))
         control = ma.array([('A', 1, -1), ('B', 2, -1),
                             ('A', 1, -1), ('B', 2, -1),
-                            (-1, -1, 1), (-1, -1, 2),],
+                            (-1, -1, 1), (-1, -1, 2), ],
                       mask=[(0, 0, 1), (0, 0, 1),
                             (0, 0, 1), (0, 0, 1),
                             (1, 1, 0), (1, 1, 0)],
@@ -555,26 +564,26 @@
                                   ('c', int), ('d', int)])
         #
         test = join_by(('a', 'b'), a, b, 'outer')
-        control = ma.array([( 0, 50, 100, -1),  ( 1, 51, 101,  -1),
-                            ( 2, 52, 102, -1),  ( 3, 53, 103,  -1),
-                            ( 4, 54, 104, -1),  ( 5, 55, 105,  -1),
-                            ( 5, 65,  -1, 100), ( 6, 56, 106,  -1),
-                            ( 6, 66,  -1, 101), ( 7, 57, 107,  -1),
-                            ( 7, 67,  -1, 102), ( 8, 58, 108,  -1),
-                            ( 8, 68,  -1, 103), ( 9, 59, 109,  -1),
-                            ( 9, 69,  -1, 104), (10, 70,  -1, 105),
-                            (11, 71,  -1, 106), (12, 72,  -1, 107),
-                            (13, 73,  -1, 108), (14, 74,  -1, 109)],
-                      mask=[( 0,  0,   0,   1), ( 0,  0,   0,   1),
-                            ( 0,  0,   0,   1), ( 0,  0,   0,   1),
-                            ( 0,  0,   0,   1), ( 0,  0,   0,   1),
-                            ( 0,  0,   1,   0), ( 0,  0,   0,   1),
-                            ( 0,  0,   1,   0), ( 0,  0,   0,   1),
-                            ( 0,  0,   1,   0), ( 0,  0,   0,   1),
-                            ( 0,  0,   1,   0), ( 0,  0,   0,   1),
-                            ( 0,  0,   1,   0), ( 0,  0,   1,   0),
-                            ( 0,  0,   1,   0), ( 0,  0,   1,   0),
-                            ( 0,  0,   1,   0), ( 0,  0,   1,   0)],
+        control = ma.array([(0, 50, 100, -1), (1, 51, 101, -1),
+                            (2, 52, 102, -1), (3, 53, 103, -1),
+                            (4, 54, 104, -1), (5, 55, 105, -1),
+                            (5, 65, -1, 100), (6, 56, 106, -1),
+                            (6, 66, -1, 101), (7, 57, 107, -1),
+                            (7, 67, -1, 102), (8, 58, 108, -1),
+                            (8, 68, -1, 103), (9, 59, 109, -1),
+                            (9, 69, -1, 104), (10, 70, -1, 105),
+                            (11, 71, -1, 106), (12, 72, -1, 107),
+                            (13, 73, -1, 108), (14, 74, -1, 109)],
+                      mask=[(0, 0, 0, 1), (0, 0, 0, 1),
+                            (0, 0, 0, 1), (0, 0, 0, 1),
+                            (0, 0, 0, 1), (0, 0, 0, 1),
+                            (0, 0, 1, 0), (0, 0, 0, 1),
+                            (0, 0, 1, 0), (0, 0, 0, 1),
+                            (0, 0, 1, 0), (0, 0, 0, 1),
+                            (0, 0, 1, 0), (0, 0, 0, 1),
+                            (0, 0, 1, 0), (0, 0, 1, 0),
+                            (0, 0, 1, 0), (0, 0, 1, 0),
+                            (0, 0, 1, 0), (0, 0, 1, 0)],
                       dtype=[('a', int), ('b', int),
                              ('c', int), ('d', int)])
         assert_equal(test, control)
@@ -585,11 +594,11 @@
                             (4, 54, 104, -1), (5, 55, 105, -1),
                             (6, 56, 106, -1), (7, 57, 107, -1),
                             (8, 58, 108, -1), (9, 59, 109, -1)],
-                      mask=[(0,  0,   0,  1), (0,  0,   0,  1),
-                            (0,  0,   0,  1), (0,  0,   0,  1),
-                            (0,  0,   0,  1), (0,  0,   0,  1),
-                            (0,  0,   0,  1), (0,  0,   0,  1),
-                            (0,  0,   0,  1), (0,  0,   0,  1)],
+                      mask=[(0, 0, 0, 1), (0, 0, 0, 1),
+                            (0, 0, 0, 1), (0, 0, 0, 1),
+                            (0, 0, 0, 1), (0, 0, 0, 1),
+                            (0, 0, 0, 1), (0, 0, 0, 1),
+                            (0, 0, 0, 1), (0, 0, 0, 1)],
                       dtype=[('a', int), ('b', int), ('c', int), ('d', int)])