[MATRIX-SIG] Prototype array with gather/scatter behavior
Chris Chase
chase@att.com
Wed, 6 Aug 97 13:09:49 EDT
I updated a module I was playing around with last year that implements
what I call general product indexing. The module is ProdArray.py.
ProdArray.py depends on the Numeric module.
It implements an array object that allows subscripts that are scalars,
slices, integer sequences (e.g. arrays), NewAxis or Ellipsis. This
provides for general scatter/gather behavior.
"get" subscripting always produces a reference to the original data
unless a scalar (rank-0) is the result.
"set" subscripting works as you would expect it. However, when using
a ProdArray instance where data elements are referenced more than
once, than the behavior of how items are set is not guaranteed, i.e.,
a = ProdArray([1,2])[[0,0,1,1],]
a.asarray()
a[:] = [5,6,7,8]
a
a.asarray()
the current implementation results in the original data being set to 6
and 8.
The code is not efficient, particularly when setting values via
subscripting (just a big loop). There are probably bugs. But I
thought this would be a useful prototype. But changing the current
Numeric module to have this behavior would require a lot of recoding.
Forgive the programming style and lack of extensive documentation.
See the docstring for the ProdArray class (of special interest are the
resize and reshape methods).
Enjoy,
Chris
chase@att.com
-- ProdArray.py included below --
from Numeric import *
import types
import operator
version = "1.0"
_arrayType = type(array(0))
# documentation notwithstanding, Numeric.array() does not accept
# None as a value for typecode.
def _array(a, typecode=None):
if typecode == None:
return array(a)
else:
return array(a, typecode)
def _projectSlice(s, n, stride=1):
# ProdArray helper function for converting slice objects into
# projectedSlice objects plus offset.
# project a slice into length=n, stride=stride start=0 sequence.
# This does the clipping of the slice as done in traditional python way.
if n == 0:
return 0, projectedSlice(0,1)
# if s != types.SliceType:
# raise TypeError, 'Slice required'
step = s.step
if step == None: step = 1
if step > 0: dir = 1
else: dir = -1
start = s.start
if start == None:
if dir > 0: start = 0
else: start = n-1
if start < 0: start = start + n
stop = s.stop
if stop == None:
if dir > 0: stop = n
else: stop = -1
else:
if stop < 0: stop = stop + n
# do clipping of slice to dimension modulo dir.
# I don't like this but it is the current implementation.
# It would be much more useful in numerical processing to
# generate an exception for slices outside the valid
# range as is done when indexing with scalars and take().
if dir > 0:
if start < 0:
start = start % dir
if start > 0: start = dir - start
stop = min(n, stop)
else:
if start >= n:
start = (start - (n-1)) % dir
if start < 0: start = dir - start
start = (n-1) + start
stop = max(-1, stop)
len = max((stop-(start-step)-dir)/step, 0)
# if len == 0: stop = start
# else: stop = start+(len-1)*step+dir
return start*stride, projectedSlice(len,step*stride)
def _substituteEllipsis(index_in, rank):
# number of actual indexes present
nind = sum(map(lambda i: i != NewAxis and i != Ellipsis, index_in))
# remove Ellipsis replacing with slice ":"
z = map(lambda i: i == Ellipsis, index_in)
ie = nonzero(z)
if len(ie) > 1:
raise IndexError, "At most one Ellipsis allowed in indexing."
if len(ie) == 1:
ie = ie[0]
nskip = rank-nind
index = list(index_in[0:ie])+[slice(None)]*nskip+list(index_in[ie+1:])
return index
else:
if nind != rank:
raise IndexError, "Incorrect number of indexes."
return index_in
def _indexArrayCopy(a,index):
# a helper function that combines NumPy array indexing behavior
# and take() for a generalized indexing scheme.
# Used for indexing array indexes in ProdArray.__getitem__()
# Indexes a as an array with a sequence of indexes producing a copy
# index is a sequence of: integer sequence, scalar, NewAxis, slice, Ellipsis
# All indexes perform as with normal indexing, except sequence
# indexes have the behavior of take()
b = asarray(a)
r = len(b.shape)
index = _substituteEllipsis(index, r)
axis = r-1
all = (slice(None),)*r
# apply indexes in reverse order
for i in range(len(index)-1,-1,-1):
idx = index[i]
a_idx = asarray(idx)
if len(a_idx.shape) == 0:
if idx == NewAxis:
b = b[all[0:axis+1]+(idx,Ellipsis)]
else:
# slices and scalar/rank-0 indexes
b = b[all[0:axis]+(a_idx[0],Ellipsis)]
else:
# sequence indexes
b = take(b, a_idx, axis)
if idx != NewAxis:
axis = axis-1
return array(b)
class _projectedSlice:
# a simplified slice-like object for by ProdArray representation.
# just a stride and a length. Offsets for _projectedSlice's are
# always assumed to be zero since ProdArray keeps track of a
# single offset for the representation.
def __init__(self, length, stride=1):
self.stride = stride
self.shape = [length]
self.length = length
self.__array__ = self.asarray
def asarray(self,type='i'):
if self.stride != 0:
return arange(0,self.length*self.stride,self.stride,type)
else:
return zeros([self.length],type)
def __len__(self):
return self.length
def __getslice__(self, start, stop):
return self[start:stop:]
def __getitem__(self, i):
# return a list containing offset and optionally a
# _projectedSlice or a contiguous array
# check i for slice
if type(i) == types.SliceType:
return _projectSlice(i,self.length,self.stride)
if type(i) == _projectedSliceType:
if i.stride == 0:
# convention: 0 stride means replication of first element
return 0, projectedSlice(i.length,0)
else:
return 0, projectedSlice(min([(self.length-1)/abs(i.stride)+1,i.length]),i.stride*self.stride)
b = asarray(i)
# check i for scalar/rank-0 which result in a scalar return,
# i.e. just an offset.
if len(b.shape) == 0:
b = b[0]
if b < 0: b=b+self.length
if b < 0 or b >= self.length:
raise IndexError, "Subscript out of range"
return b*self.stride
else:
# not a scalar
# python behavior
b = b + less(b,0)*self.length
# error on out of range
if logical_or.reduce(ravel(logical_or(less(b,0),greater_equal(b,self.length)))):
raise IndexError, "Subscript out of range"
return [0, b*self.stride]
def __repr__(self):
return ("projectedSlice(%d,%d)" % (self.length,self.stride))
def __str__(self):
return self.__repr__()
_projectedSliceType = type(projectedSlice(1))
def _indirectIndexFromDims(dims):
# just a helper function
if len(dims) == 0: return []
strides = [1]
# last dimension varies fastest
for n in range(len(dims)-1,0,-1):
strides.append(dims[n]*strides[-1])
strides.reverse()
index = map(_projectedSlice,dims,strides)
return index
# indexing a ProdArray returns a ProdArray containing a reference to
# the original data unless a rank-0 array results in which case
# indexing returns a scalar.
# copy=true - always produces a copy of data
# copy=false - will not produce a copy if data is a contiguous array,
# instead uses a reference to data
class ProdArray:
"""Prototype array class providing complete product indexing
behavior. Indexes can be scalars, slices, integer sequences,
NewAxis or Ellipsis. Subscripting a ProdArray produces a
reference to the original data. Both "get" and "set" type
subscripting is supported.
Public Instances:
shape - array shape
Additional methods:
asarray() - returns a copy of object as a Numeric array object.
getIndirectIndex() - returns an array that can be used as an indirect
index. Subscripting data with this index produces the same data
as self. For example,
a=arange(10)
b=ProdArray(a).reshape([2,5])[:,1:3]
i=b.getIndirectIndex()
c=take(a,i) # same as b.asarray().
'c' contains a copy of the same array represented by 'b'. The usefulness
would be in using 'i' to index some different data with the same size as
'a'.
resize(dims) - resize self to new shape given by 'dims'. Each
axis in the shape of self is expanded (via circular repetition of
the data) or truncated. New axes are added to the right end of
the original shape. The resize representation is very efficient.
Any original size 1 axis or new axis is represented by a special
_projectedSlice instance with zero stride.
reshape(dims,copy=0) - reshapes self to the new shape given by
'dims'. Has same restrictions as reshape() from Numeric module.
If 'copy' is false a reference to the original data is returned.
Otherwise a Numeric array object is returned with a copy of the
data.
"""
def __init__(self, data, index=None, offset=0, copy=1):
# need to do argument checking
# only integer
self.offset = offset
self.array = asarray(data)
if index != None:
# only a sequence of _projectedSlice or Array.
# don't need to check that the index vectors are valid
# since any indexing or asarray() accesses will generate
# an error
self.index = index
else:
# get shape before flattening
self.index = _indirectIndexFromDims(self.array.shape)
if copy or not self.array.iscontiguous():
self.array = array(data).flat
else:
self.array = self.array.flat
# Interface for Numeric array objects: __array__ contains a
# function that returns the object as a Numeric array
self.__array__ = self.asarray
self.shape = []
for x in self.index:
l = map(None,x.shape)
self.shape = self.shape+l
def __repr__(self):
return 'ProdArray(%s,%s,%s)' % (repr(self.array),repr(self.index),repr(self.offset))
def __getslice__(self, start, stop):
return self[start:stop:]
def __len__(self):
if len(self.shape) == 0: return 0
return self.shape[0]
def __getitem__(self, index_in, as_scalar=1):
if not operator.isSequenceType(index_in):
# scalar or slice by itself
index = (index_in, Ellipsis)
else:
index = index_in
r = len(self.shape)
# this won't allow rank-0 arrays to be subscripted
index = _substituteEllipsis(index, r)
v2 = []
i = 0 # index
j = 0 # self.index
offset = self.offset
while i < len(index):
if index[i] == NewAxis:
v2.append(projectedSlice(1,1))
i = i+1
elif type(self.index[j]) == _arrayType:
tidx = []
k = 0
while k < len(self.index[j].shape):
tidx.append(index[i])
if index[i] != NewAxis:
k = k+1
i = i+1
# subscript the array
v2.append(_indexArrayCopy(self.index[j],tidx))
j = j+1
else:
# index a slice
tidx = self.index[j][index[i]]
try:
len(tidx)
v2.append(tidx[1])
offset = offset+tidx[0]
except:
#scalar
offset = offset+tidx
# check for scalar result
i = i+1
j = j+1
if v2 or not as_scalar:
return ProdArray(self.array, index=v2, offset=offset, copy=0)
return self.array[offset]
def reshape(self,shape,copy=0):
if copy:
return reshape(self)
else:
index = reshape(self.getIndirectIndex().flat,shape)
return ProdArray(self.array,index,copy=0)
def __setslice__(self,i,j,x):
self[i:j:] = x
def __setitem__(self,i,x):
d = self.__getitem__(i,0)
x = asarray(x)
try:
# check frames aligned - size 1 dimensions are broadcast
# The frames are check by aligning their right ends (rather
# than from their left ends)
for k in range(-len(x.shape),0):
if x.shape[k] != 1 and x.shape[k] != d.shape[k]:
raise ValueError, "frames not aligned at source index %d" % k
except IndexError:
raise ValueError, "Source rank too large"
x = ProdArray(x).resize(d.shape).asarray().flat
idx = d.getIndirectIndex().flat
for k in range(len(idx)):
d.array[idx[k]] = x[k]
def resize(self, dims):
idx = []
n = len(self.shape)
r = len(dims)
x = self.__getitem__((Ellipsis,)+(NewAxis,)*(r-n),0)
s = x.shape
for i in range(len(dims)):
if s[i] == 1:
idx.append(projectedSlice(dims[i],0))
elif s[i] >= dims[i]:
idx.append(slice(0,dims[i],1))
else:
nc = ((dims[i]-1)/s[i]+1)
idx.append(concatenate((arange(s[i]),)*nc)[0:dims[i]])
return x.__getitem__(idx,0)
def getIndirectIndex(self):
# generate an indirect index vector
return asarray(reduce(add.outer,self.index,0)+self.offset)
def asarray(self, type=None):
# take() does not work for scalars or rank-0 indexes
return asarray(_indexArrayCopy(self.array,[self.getIndirectIndex()]),type)
# I don't know why I need this since __array__ is defined, but
# arithmetic doesn't work without it. I don't understand the
# coersion rules
def __coerce__(self, x):
return self.asarray(), x
_______________
MATRIX-SIG - SIG on Matrix Math for Python
send messages to: matrix-sig@python.org
administrivia to: matrix-sig-request@python.org
_______________