[MATRIX-SIG] Prototype array with gather/scatter behavior

Chris Chase chase@att.com
Wed, 6 Aug 97 13:09:49 EDT

I updated a module I was playing around with last year that implements
what I call general product indexing.  The module is ProdArray.py.
ProdArray.py depends on the Numeric module.

It implements an array object that allows subscripts that are scalars,
slices, integer sequences (e.g. arrays), NewAxis or Ellipsis.  This
provides for general scatter/gather behavior.

"get" subscripting always produces a reference to the original data
unless a scalar (rank-0) is the result.

"set" subscripting works as you would expect it.  However, when using
a ProdArray instance where data elements are referenced more than
once, than the behavior of how items are set is not guaranteed, i.e.,

a = ProdArray([1,2])[[0,0,1,1],]
a[:] = [5,6,7,8]

the current implementation results in the original data being set to 6
and 8.

The code is not efficient, particularly when setting values via
subscripting (just a big loop).  There are probably bugs.  But I
thought this would be a useful prototype.  But changing the current
Numeric module to have this behavior would require a lot of recoding.

Forgive the programming style and lack of extensive documentation.
See the docstring for the ProdArray class (of special interest are the
resize and reshape methods).


-- ProdArray.py included below --
from Numeric import *
import types
import operator

version = "1.0"

_arrayType = type(array(0))

# documentation notwithstanding, Numeric.array() does not accept
# None as a value for typecode.
def _array(a, typecode=None):
    if typecode == None:
	return array(a)
	return array(a, typecode)

def _projectSlice(s, n, stride=1):
    # ProdArray helper function for converting slice objects into
    # projectedSlice objects plus offset.

    # project a slice into length=n, stride=stride start=0 sequence.
    # This does the clipping of the slice as done in traditional python way.

    if n == 0:
	return 0, projectedSlice(0,1)
    # 	if s != types.SliceType:
    # 	    raise TypeError, 'Slice required'
    step = s.step
    if step == None: step = 1
    if step > 0: dir = 1
    else: dir = -1
    start = s.start
    if start == None:
	if dir > 0: start = 0
	else: start = n-1
    if start < 0: start = start + n
    stop = s.stop
    if stop == None:
	if dir > 0: stop = n
	else: stop = -1
	if stop < 0: stop = stop + n
    # do clipping of slice to dimension modulo dir.

    # I don't like this but it is the current implementation.
    # It would be much more useful in numerical processing to
    # generate an exception for slices outside the valid
    # range as is done when indexing with scalars and take().

    if dir > 0:
	if start < 0:
	    start = start % dir
	    if start > 0: start = dir - start
	stop = min(n, stop)
	if start >= n:
	    start = (start - (n-1)) % dir
	    if start < 0: start = dir - start
	    start = (n-1) + start
	stop = max(-1, stop)

    len = max((stop-(start-step)-dir)/step, 0)
    #     if len == 0: stop = start
    #     else: stop = start+(len-1)*step+dir
    return start*stride, projectedSlice(len,step*stride)

def _substituteEllipsis(index_in, rank):
    # number of actual indexes present
    nind = sum(map(lambda i: i != NewAxis and i != Ellipsis, index_in))
    # remove Ellipsis replacing with slice ":"
    z = map(lambda i: i == Ellipsis, index_in)
    ie = nonzero(z)
    if len(ie) > 1:
	raise IndexError, "At most one Ellipsis allowed in indexing."
    if len(ie) == 1:
	ie = ie[0]
	nskip = rank-nind
	index = list(index_in[0:ie])+[slice(None)]*nskip+list(index_in[ie+1:])
	return index
	if nind != rank:
	    raise IndexError, "Incorrect number of indexes."
	return index_in

def _indexArrayCopy(a,index):
    # a helper function that combines NumPy array indexing behavior
    # and take() for a generalized indexing scheme.
    # Used for indexing array indexes in ProdArray.__getitem__()

    # Indexes a as an array with a sequence of indexes producing a copy
    # index is a sequence of: integer sequence, scalar, NewAxis, slice, Ellipsis

    # All indexes perform as with normal indexing, except sequence
    # indexes have the behavior of take()
    b = asarray(a)
    r = len(b.shape)
    index = _substituteEllipsis(index, r)
    axis = r-1
    all = (slice(None),)*r
    # apply indexes in reverse order
    for i in range(len(index)-1,-1,-1):
	idx = index[i]
	a_idx = asarray(idx)
	if len(a_idx.shape) == 0:
	    if idx == NewAxis:
		b = b[all[0:axis+1]+(idx,Ellipsis)]
		# slices and scalar/rank-0 indexes
		b = b[all[0:axis]+(a_idx[0],Ellipsis)]
	    # sequence indexes
	    b = take(b, a_idx, axis)
	if idx != NewAxis:
	    axis = axis-1
    return array(b)

class _projectedSlice:
    # a simplified slice-like object for by ProdArray representation.
    # just a stride and a length.  Offsets for _projectedSlice's are
    # always assumed to be zero since ProdArray keeps track of a
    # single offset for the representation.

    def __init__(self, length, stride=1):
	self.stride = stride
	self.shape = [length]
	self.length = length
	self.__array__ = self.asarray

    def asarray(self,type='i'):
	if self.stride != 0:
	    return arange(0,self.length*self.stride,self.stride,type)
	    return zeros([self.length],type)
    def __len__(self):
	return self.length

    def __getslice__(self, start, stop):
	return self[start:stop:]
    def __getitem__(self, i):
	# return a list containing offset and optionally a 
	# _projectedSlice or a contiguous array
	# check i for slice
	if type(i) == types.SliceType:
	    return _projectSlice(i,self.length,self.stride)
	if type(i) == _projectedSliceType:
	    if i.stride == 0:
		# convention: 0 stride means replication of first element
		return 0, projectedSlice(i.length,0)
		return 0, projectedSlice(min([(self.length-1)/abs(i.stride)+1,i.length]),i.stride*self.stride)
	b = asarray(i)

	# check i for scalar/rank-0 which result in a scalar return,
	# i.e. just an offset.
	if len(b.shape) == 0:
	    b = b[0]
	    if b < 0: b=b+self.length
	    if b < 0 or b >= self.length:
		raise IndexError, "Subscript out of range"
	    return b*self.stride
	    # not a scalar
	    # python behavior
	    b = b + less(b,0)*self.length
	    # error on out of range
	    if logical_or.reduce(ravel(logical_or(less(b,0),greater_equal(b,self.length)))):
		raise IndexError, "Subscript out of range"
	    return [0, b*self.stride]

    def __repr__(self):
	return ("projectedSlice(%d,%d)" % (self.length,self.stride))

    def __str__(self):
	return self.__repr__()
_projectedSliceType = type(projectedSlice(1))
def _indirectIndexFromDims(dims):
    # just a helper function
    if len(dims) == 0: return []
    strides = [1]
    # last dimension varies fastest
    for n in range(len(dims)-1,0,-1):
    index = map(_projectedSlice,dims,strides)
    return index

# indexing a ProdArray returns a ProdArray containing a reference to
# the original data unless a rank-0 array results in which case
# indexing returns a scalar.
# copy=true - always produces a copy of data

# copy=false - will not produce a copy if data is a contiguous array,
# instead uses a reference to data

class ProdArray:
    """Prototype array class providing complete product indexing
    behavior.  Indexes can be scalars, slices, integer sequences,
    NewAxis or Ellipsis.  Subscripting a ProdArray produces a
    reference to the original data.  Both "get" and "set" type
    subscripting is supported.

    Public Instances:
    shape - array shape

    Additional methods:

    asarray() - returns a copy of object as a Numeric array object.

    getIndirectIndex() - returns an array that can be used as an indirect
    index.  Subscripting data with this index produces the same data
    as self.   For example,
    c=take(a,i) # same as b.asarray().
    'c' contains a copy of the same array represented by 'b'.  The usefulness
    would be in using 'i' to index some different data with the same size as

    resize(dims) - resize self to new shape given by 'dims'.  Each
    axis in the shape of self is expanded (via circular repetition of
    the data) or truncated.  New axes are added to the right end of
    the original shape.  The resize representation is very efficient.
    Any original size 1 axis or new axis is represented by a special
    _projectedSlice instance with zero stride.  

    reshape(dims,copy=0) - reshapes self to the new shape given by
    'dims'.  Has same restrictions as reshape() from Numeric module.
    If 'copy' is false a reference to the original data is returned.
    Otherwise a Numeric array object is returned with a copy of the

    def __init__(self, data, index=None, offset=0, copy=1):
	# need to do argument checking
	# only integer
	self.offset = offset
	self.array = asarray(data)
	if index != None:
	    # only a sequence of _projectedSlice or Array.
	    # don't need to check that the index vectors are valid
	    # since any indexing or asarray() accesses will generate
	    # an error
	    self.index = index
	    # get shape before flattening
	    self.index = _indirectIndexFromDims(self.array.shape)
	if copy or not self.array.iscontiguous():
	    self.array = array(data).flat
	    self.array = self.array.flat
	# Interface for Numeric array objects: __array__ contains a
	# function that returns the object as a Numeric array
	self.__array__ = self.asarray
	self.shape = []
	for x in self.index:
	    l = map(None,x.shape)
	    self.shape = self.shape+l

    def __repr__(self):
	return 'ProdArray(%s,%s,%s)' % (repr(self.array),repr(self.index),repr(self.offset))
    def __getslice__(self, start, stop):
	return self[start:stop:]

    def __len__(self):
	if len(self.shape) == 0: return 0
	return self.shape[0]

    def __getitem__(self, index_in, as_scalar=1):
	if not operator.isSequenceType(index_in):
	    # scalar or slice by itself
	    index = (index_in, Ellipsis)
	    index = index_in
	r = len(self.shape)
	# this won't allow rank-0 arrays to be subscripted
	index = _substituteEllipsis(index, r)
	v2 = []
	i = 0 # index
	j = 0 # self.index
	offset = self.offset
	while i < len(index):
	    if index[i] == NewAxis:
		i = i+1
	    elif type(self.index[j]) == _arrayType:
		tidx = []
		k = 0
		while k < len(self.index[j].shape):
		    if index[i] != NewAxis:
			k = k+1
		    i = i+1
		# subscript the array
		j = j+1
		# index a slice
		tidx = self.index[j][index[i]]
		    offset = offset+tidx[0]
		    offset = offset+tidx
		# check for scalar result
		i = i+1
		j = j+1
	if v2 or not as_scalar:
	    return ProdArray(self.array, index=v2, offset=offset, copy=0)
	return self.array[offset]
    def reshape(self,shape,copy=0):
	if copy:
	    return reshape(self)
	    index = reshape(self.getIndirectIndex().flat,shape)
	    return ProdArray(self.array,index,copy=0)
    def __setslice__(self,i,j,x):
	self[i:j:] = x

    def __setitem__(self,i,x):
	d = self.__getitem__(i,0)
	x = asarray(x)
	    # check frames aligned - size 1 dimensions are broadcast
	    # The frames are check by aligning their right ends (rather
	    # than from their left ends)
	    for k in range(-len(x.shape),0):
		if x.shape[k] != 1 and x.shape[k] != d.shape[k]:
		    raise ValueError, "frames not aligned at source index %d" % k
	except IndexError:
	    raise ValueError, "Source rank too large"
	x = ProdArray(x).resize(d.shape).asarray().flat
	idx = d.getIndirectIndex().flat
	for k in range(len(idx)):
	    d.array[idx[k]] = x[k]

    def resize(self, dims):
	idx = []
	n = len(self.shape)
	r = len(dims)
	x = self.__getitem__((Ellipsis,)+(NewAxis,)*(r-n),0)
	s = x.shape
	for i in range(len(dims)):
	    if s[i] == 1:
	    elif s[i] >= dims[i]:
		nc = ((dims[i]-1)/s[i]+1)
	return x.__getitem__(idx,0)
    def getIndirectIndex(self):
	# generate an indirect index vector
	return asarray(reduce(add.outer,self.index,0)+self.offset)
    def asarray(self, type=None):
	# take() does not work for scalars or rank-0 indexes
	return asarray(_indexArrayCopy(self.array,[self.getIndirectIndex()]),type)

    # I don't know why I need this since __array__ is defined, but
    # arithmetic doesn't work without it.  I don't understand the
    # coersion rules

    def __coerce__(self, x):
	return self.asarray(), x

MATRIX-SIG  - SIG on Matrix Math for Python

send messages to: matrix-sig@python.org
administrivia to: matrix-sig-request@python.org