[Numpy-discussion] Zeros in strides

Sasha ndarray at mac.com
Fri Feb 3 13:43:01 EST 2006


On 2/2/06, Travis Oliphant <oliphant.travis at ieee.org> wrote:
> Sasha wrote:
>
> >Sure.  I've started working on a "proof of concept" patch and will post it soon.
> >
> Great.

Attached patch allows numpy create memory-saving zero-stride arrays.

Here is a sample session:

>>> from numpy import *
>>> x = ndarray([5], strides=0)
>>> x
array([12998768, 12998768, 12998768, 12998768, 12998768])
>>> x[0] = 0
>>> x
array([0, 0, 0, 0, 0])
>>> x.strides = 4
Traceback (most recent call last):
  File "<stdin>", line 1, in ?
ValueError: strides is not compatible with available memory
>>> x.strides
(0,)
>>> x.data
Traceback (most recent call last):
  File "<stdin>", line 1, in ?
AttributeError: cannot get single-segment buffer for discontiguous array
>>> exp(x)
array([ 1.,  1.,  1.,  1.,  1.])
# Only single-element buffer is required for zero-stride array:
>>> y = ones(1)
>>> z = ndarray([10], strides=0, buffer=y)
>>> z
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])


I probably missed some places where buffer size is computed as a
product of dimensions, but it should not be hard to review the code
for those if we agree that having zero-stride arrays is a good idea.

Note that I did not attempt to change any behaviors, the only change
is that zero-stride arrays do not use more memory than they need.
-------------- next part --------------
Index: numpy/core/src/arrayobject.c
===================================================================
--- numpy/core/src/arrayobject.c	(revision 2055)
+++ numpy/core/src/arrayobject.c	(working copy)
@@ -3517,8 +3517,7 @@
 
  For axes with a positive stride this function checks for a walk
  beyond the right end of the buffer, for axes with a negative stride,
- it checks for a walk beyond the left end of the buffer.  Zero strides
- are disallowed.
+ it checks for a walk beyond the left end of the buffer.
 */
 /*OBJECT_API*/
 static Bool
@@ -3532,27 +3531,17 @@
 	
 	for (i=0; i<nd; i++) {
 		intp stride = newstrides[i];
-		if (stride > 0) {
+		if (stride >= 0) {
 			/* The last stride does not need to be fully inside
 			   the buffer, only its first elsize bytes */
 			if (offset + stride*(dims[i]-1)+elsize > numbytes) {
 				return FALSE;
 			}
 		}
-		else if (stride < 0) {
+		else {
 			if (offset + stride*dims[i] < 0) {
 				return FALSE;
 			}
-		} else {
-			/* XXX: Zero strides may be useful, but currently 
-			   XXX: allowing them would lead to strange results,
-			   XXX: for example :
-			   XXX: >>> x = arange(5)
-			   XXX: >>> x.strides = 0
-			   XXX: >>> x += 1
-			   XXX: >>> x
-			   XXX: array([5, 5, 5, 5, 5])  */
-			return FALSE;
 		}
 	}
 	return TRUE;
@@ -3602,6 +3591,33 @@
 	}
 	return itemsize;
 }
+/* computes the buffer size needed to accomodate dims and strides */
+static intp
+_array_buffer_size(int nd, intp *dims, intp *strides,  intp itemsize)
+{
+	intp bufsize = 0, size;
+	int i;
+	for (i = 0; i < nd; ++i) {
+		if (dims[i] < 0) {
+			PyErr_Format(PyExc_ValueError, 
+				     "negative dimension (%d) for axis %d",
+				     dims[i], i);
+			return -1;
+		}
+		if (strides[i] < 0) {
+			PyErr_Format(PyExc_ValueError, 
+				     "negative stride (%d) for axis %d",
+				     strides[i], i);
+			return -1;
+		}
+		if (dims[i] == 0)
+			continue;
+		size = (dims[i] - 1)*strides[i] + itemsize;
+		if (size > bufsize)
+			bufsize = size;
+	}
+	return bufsize;
+}
 
 /*OBJECT_API
  Generic new array creation routine.
@@ -3768,13 +3784,8 @@
 						 flags, &(self->flags));
 		}
 		else {
-			if (data == NULL) {
-				PyErr_SetString(PyExc_ValueError, 
-						"if 'strides' is given in " \
-						"array creation, data must " \
-						"be given too");
-				goto fail;
-			} 
+			sd = _array_buffer_size(nd, dims, strides, sd);
+			if (sd < 0) goto fail;
 			memcpy(self->strides, strides, sizeof(intp)*nd);
 		}
 	}       	
@@ -4092,7 +4103,7 @@
                 if (dims.len == 1 && dims.ptr[0] == -1) {
                         dims.ptr[offset] = buffer.len / itemsize;
                 }
-                else if (buffer.len < itemsize*                 \
+                else if (strides.ptr == NULL && buffer.len < itemsize* \
                          PyArray_MultiplyList(dims.ptr, dims.len)) {
                         PyErr_SetString(PyExc_TypeError, 
                                         "buffer is too small for "      \
@@ -4242,9 +4253,9 @@
 		if (PyArray_Check(new->base)) 
 			new = (PyArrayObject *)new->base;
 	}
-	numbytes = PyArray_MultiplyList(new->dimensions, 
-					new->nd)*new->descr->elsize;
-	
+	numbytes = _array_buffer_size(new->nd, new->dimensions, new->strides, 
+				      new->descr->elsize);
+	if (numbytes < 0) goto fail;
 	if (!PyArray_CheckStrides(self->descr->elsize, self->nd, numbytes,
 				  self->data - new->data,
 				  self->dimensions, newstrides.ptr)) {


More information about the NumPy-Discussion mailing list