[Numpy-discussion] NumPy re-factoring project

Thu Jun 10 20:27:18 EDT 2010

Den 11.06.2010 00:57, skrev David Cournapeau:
> Do you have the code for this ? That's something I wanted to do, but
> never took the time to do. Faster generic iterator would be nice, but
> very hard to do in general.
>
>    

/* this computes the start adress for every vector along a dimension
(axis) of an ndarray */

typedef PyArrayObject ndarray;

inline char *datapointer(ndarray *a, npy_intp *indices)
{
     char *data = a->data;
     int i;
     npy_intp j;
     for (i=0; i < a->nd; i++) {
         j = indices[i];
         data += j * a->strides[i];
     }
     return data;
}

static double ** _get_axis_pointer(npy_intp *indices, int axis,
                            ndarray *a, double **axis_ptr_array, int dim)
{
     /* recursive axis pointer search for 4 dimensions or more */
     npy_intp i;
     double *axis_ptr;
     if (dim == a->nd) {
         /* done recursing dimensions,
             compute address of this axis */
         axis_ptr = (double *) datapointer(a, indices);
         *axis_ptr_array = axis_ptr;
         return (axis_ptr_array + 1);
     } else if (dim == axis) {
         /* use first element only */
         indices[dim] = 0;
         axis_ptr_array = _get_axis_pointer(indices, axis, a, 
axis_ptr_array, dim+1);
         return axis_ptr_array;
     } else {
         /* iterate range of indices */
         npy_intp length = a->dimensions[dim];
         for (i=0; i < length; i++) {
             indices[dim] = i;
             axis_ptr_array = _get_axis_pointer(indices, axis, a, 
axis_ptr_array, dim+1);
         }
         return axis_ptr_array;
     }
}

static double **get_axis_pointers(ndarray *a, int axis, npy_intp *count)
{
     npy_intp indices[NPY_MAXDIMS];
     double **out, **tmp;
     npy_intp i, j;
     j = 1;
     for (i=0; i < a->nd; i++) {
         if (i != axis)
             j *= a->dimensions[i];
     }
     *count = j;

     out = (double **) malloc(*count * sizeof(double*));
     if (out == NULL) {
         *count = 0;
         return NULL;
     }
     tmp = out;
     switch (a->nd) {
         /* for one dimension we just return the data pointer */
         case 1:
             *tmp = (double *)a->data;
             break;
         /* specialized for two dimensions */
         case 2:
             switch (axis) {
                 case 0:
                     for (i=0; i<a->dimensions[1]; i++)
                         *tmp++ = (double *)(a->data + i*a->strides[1]);
                     break;

                 case 1:
                     for (i=0; i<a->dimensions[0]; i++)
                         *tmp++ = (double *)(a->data + i*a->strides[0]);
                     break;
             }
             break;
         /* specialized for three dimensions */
         case 3:
             switch (axis) {
                 case 0:
                     for (i=0; i<a->dimensions[1]; i++)
                         for (j=0; j<a->dimensions[2]; j++)
                             *tmp++ = (double *)(a->data + 
i*a->strides[1] + j*a->strides[2]);
                     break;
                 case 1:
                     for (i=0; i<a->dimensions[0]; i++)
                         for (j=0; j<a->dimensions[2]; j++)
                             *tmp++ = (double *)(a->data + 
i*a->strides[0] + j*a->strides[2]);
                     break;

                 case 2:
                     for (i=0; i<a->dimensions[0]; i++)
                         for (j=0; j<a->dimensions[1]; j++)
                             *tmp++ = (double *)(a->data + 
i*a->strides[0] + j*a->strides[1]);

             }
             break;
         /* four or more dimensions: use recursion */
         default:
             for (i=0; i<a->nd; i++) indices[i] = 0;
             _get_axis_pointer(indices, axis, a, out, 0);

     }
done:
     return out;
}

>> Another thing I did when reimplementing lfilter was "copy-in copy-out"
>> for strided arrays.
>>      
> What is copy-in copy out ? I am not familiar with this term ?
>
>    

Strided memory access is slow. So it often helps to make a temporary 
copy that are contiguous.

static void copy_in(double *restrict y, double *restrict x, npy_intp n, 
npy_intp s)
{
     npy_intp i;
     char *tmp;
     if (s == sizeof(double))
         memcpy(y, x, n*sizeof(double));
     else {
         tmp = (char *)x;
         for (i=0; i<n; i++) {
             *y++ = *x;
             tmp += s;
             x = (double *)tmp;
         }
     }
}

static void copy_out(double *restrict y, double *restrict x, npy_intp n, 
npy_intp s)
{
     npy_intp i;
     char *tmp;
     if (s == sizeof(double))
         memcpy(y, x, n*sizeof(double));
     else {
         tmp = (char *)y;
         for (i=0; i<n; i++) {
             *y = *x++;
             tmp += s;
             y = (double *)tmp;
         }
     }
}

Sturla

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/numpy-discussion/attachments/20100611/117f85b5/attachment.html>