[Numpy-svn] r8306 - in trunk/numpy/lib: . tests
numpy-svn at scipy.org
numpy-svn at scipy.org
Fri Mar 26 01:15:57 EDT 2010
Author: pierregm
Date: 2010-03-26 00:15:57 -0500 (Fri, 26 Mar 2010)
New Revision: 8306
Modified:
trunk/numpy/lib/recfunctions.py
trunk/numpy/lib/tests/test_recfunctions.py
Log:
* Fixed merge_arrays for arrays of size 1 (bug #1407)
* merge_arrays now accepts sequences of lists/tuples as inputs
Modified: trunk/numpy/lib/recfunctions.py
===================================================================
--- trunk/numpy/lib/recfunctions.py 2010-03-25 03:32:17 UTC (rev 8305)
+++ trunk/numpy/lib/recfunctions.py 2010-03-26 05:15:57 UTC (rev 8306)
@@ -217,7 +217,7 @@
current = adtype[name]
if current.names:
if lastname:
- parents[name] = [lastname,]
+ parents[name] = [lastname, ]
else:
parents[name] = []
parents.update(get_fieldstructure(current, name, parents))
@@ -227,7 +227,7 @@
# if (lastparent[-1] != lastname):
lastparent.append(lastname)
elif lastname:
- lastparent = [lastname,]
+ lastparent = [lastname, ]
parents[name] = lastparent or []
return parents or None
@@ -274,7 +274,7 @@
Whether to
"""
# OK, that's a complete ripoff from Python2.6 itertools.izip_longest
- def sentinel(counter = ([fill_value]*(len(seqarrays)-1)).pop):
+ def sentinel(counter=([fill_value] * (len(seqarrays) - 1)).pop):
"Yields the fill_value or raises IndexError"
yield counter()
#
@@ -324,8 +324,9 @@
return output
+
def merge_arrays(seqarrays,
- fill_value=-1, flatten=False, usemask=True, asrecarray=False):
+ fill_value= -1, flatten=False, usemask=False, asrecarray=False):
"""
Merge arrays field by field.
@@ -372,62 +373,92 @@
True for boolean values
* XXX: I just obtained these values empirically
"""
+ # Only one item in the input sequence ?
if (len(seqarrays) == 1):
- seqarrays = seqarrays[0]
- if isinstance(seqarrays, ndarray):
+ seqarrays = np.asanyarray(seqarrays[0])
+ # Do we have a single ndarary as input ?
+ if isinstance(seqarrays, (ndarray, np.void)):
seqdtype = seqarrays.dtype
if (not flatten) or \
(zip_descr((seqarrays,), flatten=True) == seqdtype.descr):
+ # Minimal processing needed: just make sure everythng's a-ok
seqarrays = seqarrays.ravel()
+ # Make sure we have named fields
if not seqdtype.names:
- seqarrays = seqarrays.view([('', seqdtype)])
+ seqdtype = [('', seqdtype)]
+ # Find what type of array we must return
if usemask:
if asrecarray:
- return seqarrays.view(MaskedRecords)
- return seqarrays.view(MaskedArray)
+ seqtype = MaskedRecords
+ else:
+ seqtype = MaskedArray
elif asrecarray:
- return seqarrays.view(recarray)
- return seqarrays
+ seqtype = recarray
+ else:
+ seqtype = ndarray
+ return seqarrays.view(dtype=seqdtype, type=seqtype)
else:
seqarrays = (seqarrays,)
- # Get the dtype
+ else:
+ # Make sure we have arrays in the input sequence
+ seqarrays = map(np.asanyarray, seqarrays)
+ # Find the sizes of the inputs and their maximum
+ sizes = tuple(a.size for a in seqarrays)
+ maxlength = max(sizes)
+ # Get the dtype of the output (flattening if needed)
newdtype = zip_descr(seqarrays, flatten=flatten)
- # Get the data and the fill_value from each array
- seqdata = [ma.getdata(a.ravel()) for a in seqarrays]
- seqmask = [ma.getmaskarray(a).ravel() for a in seqarrays]
- fill_value = [_check_fill_value(fill_value, a.dtype) for a in seqdata]
- # Make an iterator from each array, padding w/ fill_values
- maxlength = max(len(a) for a in seqarrays)
- for (i, (a, m, fval)) in enumerate(zip(seqdata, seqmask, fill_value)):
- # Flatten the fill_values if there's only one field
- if isinstance(fval, (ndarray, np.void)):
- fmsk = ma.ones((1,), m.dtype)[0]
- if len(fval.dtype) == 1:
- fval = fval.item()[0]
+ # Initialize the sequences for data and mask
+ seqdata = []
+ seqmask = []
+ # If we expect some kind of MaskedArray, make a special loop.
+ if usemask:
+ for (a, n) in itertools.izip(seqarrays, sizes):
+ nbmissing = (maxlength - n)
+ # Get the data and mask
+ data = a.ravel().__array__()
+ mask = ma.getmaskarray(a).ravel()
+ # Get the filling value (if needed)
+ if nbmissing:
+ fval = _check_fill_value(fill_value, a.dtype)
+ if isinstance(fval, (ndarray, np.void)):
+ if len(fval.dtype) == 1:
+ fval = fval.item()[0]
+ fmsk = True
+ else:
+ fval = np.array(fval, dtype=a.dtype, ndmin=1)
+ fmsk = np.ones((1,), dtype=mask.dtype)
+ else:
+ fval = None
fmsk = True
- else:
- # fval and fmsk should be np.void objects
- fval = np.array([fval,], dtype=a.dtype)[0]
-# fmsk = np.array([fmsk,], dtype=m.dtype)[0]
- else:
- fmsk = True
- nbmissing = (maxlength-len(a))
- seqdata[i] = itertools.chain(a, [fval]*nbmissing)
- seqmask[i] = itertools.chain(m, [fmsk]*nbmissing)
- #
- data = izip_records(seqdata, flatten=flatten)
- data = tuple(data)
- if usemask:
- mask = izip_records(seqmask, fill_value=True, flatten=flatten)
- mask = tuple(mask)
- output = ma.array(np.fromiter(data, dtype=newdtype))
- output._mask[:] = list(mask)
+ # Store an iterator padding the input to the expected length
+ seqdata.append(itertools.chain(data, [fval] * nbmissing))
+ seqmask.append(itertools.chain(mask, [fmsk] * nbmissing))
+ # Create an iterator for the data
+ data = tuple(izip_records(seqdata, flatten=flatten))
+ output = ma.array(np.fromiter(data, dtype=newdtype, count=maxlength),
+ mask=list(izip_records(seqmask, flatten=flatten)))
if asrecarray:
output = output.view(MaskedRecords)
else:
- output = np.fromiter(data, dtype=newdtype)
+ # Same as before, without the mask we don't need...
+ for (a, n) in itertools.izip(seqarrays, sizes):
+ nbmissing = (maxlength - n)
+ data = a.ravel().__array__()
+ if nbmissing:
+ fval = _check_fill_value(fill_value, a.dtype)
+ if isinstance(fval, (ndarray, np.void)):
+ if len(fval.dtype) == 1:
+ fval = fval.item()[0]
+ else:
+ fval = np.array(fval, dtype=a.dtype, ndmin=1)
+ else:
+ fval = None
+ seqdata.append(itertools.chain(data, [fval] * nbmissing))
+ output = np.fromiter(tuple(izip_records(seqdata, flatten=flatten)),
+ dtype=newdtype, count=maxlength)
if asrecarray:
output = output.view(recarray)
+ # And we're done...
return output
@@ -467,7 +498,7 @@
dtype=[('a', '<i4')])
"""
if _is_string_like(drop_names):
- drop_names = [drop_names,]
+ drop_names = [drop_names, ]
else:
drop_names = set(drop_names)
#
@@ -542,7 +573,7 @@
def append_fields(base, names, data=None, dtypes=None,
- fill_value=-1, usemask=True, asrecarray=False):
+ fill_value= -1, usemask=True, asrecarray=False):
"""
Add new fields to an existing array.
@@ -577,14 +608,14 @@
err_msg = "The number of arrays does not match the number of names"
raise ValueError(err_msg)
elif isinstance(names, basestring):
- names = [names,]
- data = [data,]
+ names = [names, ]
+ data = [data, ]
#
if dtypes is None:
data = [np.array(a, copy=False, subok=True) for a in data]
data = [a.view([(name, a.dtype)]) for (name, a) in zip(names, data)]
elif not hasattr(dtypes, '__iter__'):
- dtypes = [dtypes,]
+ dtypes = [dtypes, ]
if len(data) != len(dtypes):
if len(dtypes) == 1:
dtypes = dtypes * len(data)
@@ -712,7 +743,7 @@
current_descr[-1] = descr[1]
newdescr[nameidx] = tuple(current_descr)
elif descr[1] != current_descr[-1]:
- raise TypeError("Incompatible type '%s' <> '%s'" %\
+ raise TypeError("Incompatible type '%s' <> '%s'" % \
(dict(newdescr)[name], descr[1]))
# Only one field: use concatenate
if len(newdescr) == 1:
@@ -849,14 +880,14 @@
"'outer' or 'leftouter' (got '%s' instead)" % jointype)
# If we have a single key, put it in a tuple
if isinstance(key, basestring):
- key = (key, )
+ key = (key,)
# Check the keys
for name in key:
if name not in r1.dtype.names:
- raise ValueError('r1 does not have key field %s'%name)
+ raise ValueError('r1 does not have key field %s' % name)
if name not in r2.dtype.names:
- raise ValueError('r2 does not have key field %s'%name)
+ raise ValueError('r2 does not have key field %s' % name)
# Make sure we work with ravelled arrays
r1 = r1.ravel()
@@ -915,7 +946,7 @@
else:
current[0] += r1postfix
desc[0] += r2postfix
- ndtype.insert(nameidx+1, desc)
+ ndtype.insert(nameidx + 1, desc)
#... we haven't: just add the description to the current list
else:
names.extend(desc[0])
@@ -934,7 +965,7 @@
current = output[f]
current[:r1cmn] = selected[:r1cmn]
if jointype in ('outer', 'leftouter'):
- current[cmn:cmn+r1spc] = selected[r1cmn:]
+ current[cmn:cmn + r1spc] = selected[r1cmn:]
for f in r2names:
selected = s2[f]
if f not in names:
Modified: trunk/numpy/lib/tests/test_recfunctions.py
===================================================================
--- trunk/numpy/lib/tests/test_recfunctions.py 2010-03-25 03:32:17 UTC (rev 8305)
+++ trunk/numpy/lib/tests/test_recfunctions.py 2010-03-26 05:15:57 UTC (rev 8306)
@@ -17,7 +17,7 @@
"""
#
def setUp(self):
- x = np.array([1, 2,])
+ x = np.array([1, 2, ])
y = np.array([10, 20, 30])
z = np.array([('A', 1.), ('B', 2.)],
dtype=[('A', '|S3'), ('B', float)])
@@ -41,7 +41,7 @@
assert_equal(test,
np.dtype([('', int), ('A', '|S3'), ('B', float)]))
test = zip_descr((x, z), flatten=False)
- assert_equal(test,
+ assert_equal(test,
np.dtype([('', int),
('', [('A', '|S3'), ('B', float)])]))
# Standard & nested dtype
@@ -71,7 +71,7 @@
control = np.array([(1,), (4,)], dtype=[('a', int)])
assert_equal(test, control)
# A nested sub-field
- test = drop_fields(a, ['ba',])
+ test = drop_fields(a, ['ba', ])
control = np.array([(1, (3.0,)), (4, (6.0,))],
dtype=[('a', int), ('b', [('bb', int)])])
assert_equal(test, control)
@@ -127,13 +127,13 @@
# One 1-nested field
ndtype = np.dtype([('A', int), ('B', [('BA', float), ('BB', '|S1')])])
test = get_fieldstructure(ndtype)
- assert_equal(test, {'A': [], 'B': [], 'BA':['B',], 'BB':['B']})
+ assert_equal(test, {'A': [], 'B': [], 'BA':['B', ], 'BB':['B']})
# One 2-nested fields
ndtype = np.dtype([('A', int),
('B', [('BA', int),
('BB', [('BBA', int), ('BBB', int)])])])
test = get_fieldstructure(ndtype)
- control = {'A': [], 'B': [], 'BA': ['B'], 'BB': ['B'],
+ control = {'A': [], 'B': [], 'BA': ['B'], 'BB': ['B'],
'BBA': ['B', 'BB'], 'BBB': ['B', 'BB']}
assert_equal(test, control)
@@ -173,7 +173,7 @@
def test_find_duplicates_ignoremask(self):
"Test the ignoremask option of find_duplicates"
ndtype = [('a', int)]
- a = ma.array([1, 1, 1, 2, 2, 3, 3],
+ a = ma.array([1, 1, 1, 2, 2, 3, 3],
mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype)
test = find_duplicates(a, ignoremask=True, return_index=True)
control = [0, 1, 3, 4]
@@ -218,7 +218,7 @@
Test merge_arrays
"""
def setUp(self):
- x = np.array([1, 2,])
+ x = np.array([1, 2, ])
y = np.array([10, 20, 30])
z = np.array([('A', 1.), ('B', 2.)], dtype=[('A', '|S3'), ('B', float)])
w = np.array([(1, (2, 3.0)), (4, (5, 6.0))],
@@ -291,7 +291,7 @@
assert_equal(test, control)
#
test = merge_arrays((x, w), flatten=False)
- controldtype = dtype=[('f0', int),
+ controldtype = dtype = [('f0', int),
('f1', [('a', int),
('b', [('ba', float), ('bb', int)])])]
control = np.array([(1., (1, (2, 3.0))), (2, (4, (5, 6.0)))],
@@ -325,6 +325,15 @@
test = merge_arrays((z, np.array([10, 20, 30]).view([('C', int)])))
control = np.array([('A', 1., 10), ('B', 2., 20), ('-1', -1, 20)],
dtype=[('A', '|S3'), ('B', float), ('C', int)])
+ #
+ def test_singlerecord(self):
+ (_, x, y, z) = self.data
+ test = merge_arrays((x[0], y[0], z[0]), usemask=False)
+ control = np.array([(1, 10, ('A', 1))],
+ dtype=[('f0', int),
+ ('f1', int),
+ ('f2', [('A', '|S3'), ('B', float)])])
+ assert_equal(test, control)
@@ -333,7 +342,7 @@
Test append_fields
"""
def setUp(self):
- x = np.array([1, 2,])
+ x = np.array([1, 2, ])
y = np.array([10, 20, 30])
z = np.array([('A', 1.), ('B', 2.)], dtype=[('A', '|S3'), ('B', float)])
w = np.array([(1, (2, 3.0)), (4, (5, 6.0))],
@@ -387,7 +396,7 @@
Test stack_arrays
"""
def setUp(self):
- x = np.array([1, 2,])
+ x = np.array([1, 2, ])
y = np.array([10, 20, 30])
z = np.array([('A', 1.), ('B', 2.)], dtype=[('A', '|S3'), ('B', float)])
w = np.array([(1, (2, 3.0)), (4, (5, 6.0))],
@@ -436,7 +445,7 @@
#
test = stack_arrays((z, x))
control = ma.array([('A', 1, -1), ('B', 2, -1),
- (-1, -1, 1), (-1, -1, 2),],
+ (-1, -1, 1), (-1, -1, 2), ],
mask=[(0, 0, 1), (0, 0, 1),
(1, 1, 0), (1, 1, 0)],
dtype=[('A', '|S3'), ('B', float), ('f2', int)])
@@ -446,7 +455,7 @@
test = stack_arrays((z, z, x))
control = ma.array([('A', 1, -1), ('B', 2, -1),
('A', 1, -1), ('B', 2, -1),
- (-1, -1, 1), (-1, -1, 2),],
+ (-1, -1, 1), (-1, -1, 2), ],
mask=[(0, 0, 1), (0, 0, 1),
(0, 0, 1), (0, 0, 1),
(1, 1, 0), (1, 1, 0)],
@@ -555,26 +564,26 @@
('c', int), ('d', int)])
#
test = join_by(('a', 'b'), a, b, 'outer')
- control = ma.array([( 0, 50, 100, -1), ( 1, 51, 101, -1),
- ( 2, 52, 102, -1), ( 3, 53, 103, -1),
- ( 4, 54, 104, -1), ( 5, 55, 105, -1),
- ( 5, 65, -1, 100), ( 6, 56, 106, -1),
- ( 6, 66, -1, 101), ( 7, 57, 107, -1),
- ( 7, 67, -1, 102), ( 8, 58, 108, -1),
- ( 8, 68, -1, 103), ( 9, 59, 109, -1),
- ( 9, 69, -1, 104), (10, 70, -1, 105),
- (11, 71, -1, 106), (12, 72, -1, 107),
- (13, 73, -1, 108), (14, 74, -1, 109)],
- mask=[( 0, 0, 0, 1), ( 0, 0, 0, 1),
- ( 0, 0, 0, 1), ( 0, 0, 0, 1),
- ( 0, 0, 0, 1), ( 0, 0, 0, 1),
- ( 0, 0, 1, 0), ( 0, 0, 0, 1),
- ( 0, 0, 1, 0), ( 0, 0, 0, 1),
- ( 0, 0, 1, 0), ( 0, 0, 0, 1),
- ( 0, 0, 1, 0), ( 0, 0, 0, 1),
- ( 0, 0, 1, 0), ( 0, 0, 1, 0),
- ( 0, 0, 1, 0), ( 0, 0, 1, 0),
- ( 0, 0, 1, 0), ( 0, 0, 1, 0)],
+ control = ma.array([(0, 50, 100, -1), (1, 51, 101, -1),
+ (2, 52, 102, -1), (3, 53, 103, -1),
+ (4, 54, 104, -1), (5, 55, 105, -1),
+ (5, 65, -1, 100), (6, 56, 106, -1),
+ (6, 66, -1, 101), (7, 57, 107, -1),
+ (7, 67, -1, 102), (8, 58, 108, -1),
+ (8, 68, -1, 103), (9, 59, 109, -1),
+ (9, 69, -1, 104), (10, 70, -1, 105),
+ (11, 71, -1, 106), (12, 72, -1, 107),
+ (13, 73, -1, 108), (14, 74, -1, 109)],
+ mask=[(0, 0, 0, 1), (0, 0, 0, 1),
+ (0, 0, 0, 1), (0, 0, 0, 1),
+ (0, 0, 0, 1), (0, 0, 0, 1),
+ (0, 0, 1, 0), (0, 0, 0, 1),
+ (0, 0, 1, 0), (0, 0, 0, 1),
+ (0, 0, 1, 0), (0, 0, 0, 1),
+ (0, 0, 1, 0), (0, 0, 0, 1),
+ (0, 0, 1, 0), (0, 0, 1, 0),
+ (0, 0, 1, 0), (0, 0, 1, 0),
+ (0, 0, 1, 0), (0, 0, 1, 0)],
dtype=[('a', int), ('b', int),
('c', int), ('d', int)])
assert_equal(test, control)
@@ -585,11 +594,11 @@
(4, 54, 104, -1), (5, 55, 105, -1),
(6, 56, 106, -1), (7, 57, 107, -1),
(8, 58, 108, -1), (9, 59, 109, -1)],
- mask=[(0, 0, 0, 1), (0, 0, 0, 1),
- (0, 0, 0, 1), (0, 0, 0, 1),
- (0, 0, 0, 1), (0, 0, 0, 1),
- (0, 0, 0, 1), (0, 0, 0, 1),
- (0, 0, 0, 1), (0, 0, 0, 1)],
+ mask=[(0, 0, 0, 1), (0, 0, 0, 1),
+ (0, 0, 0, 1), (0, 0, 0, 1),
+ (0, 0, 0, 1), (0, 0, 0, 1),
+ (0, 0, 0, 1), (0, 0, 0, 1),
+ (0, 0, 0, 1), (0, 0, 0, 1)],
dtype=[('a', int), ('b', int), ('c', int), ('d', int)])
More information about the Numpy-svn
mailing list