[pypy-commit] pypy numpy-multidim-shards: broadcast passes tests, code needs review

mattip noreply at buildbot.pypy.org
Sun Nov 20 00:37:44 CET 2011


Author: mattip
Branch: numpy-multidim-shards
Changeset: r49560:03025883c2b1
Date: 2011-11-20 01:28 +0200
http://bitbucket.org/pypy/pypy/changeset/03025883c2b1/

Log:	broadcast passes tests, code needs review

diff --git a/pypy/module/micronumpy/interp_numarray.py b/pypy/module/micronumpy/interp_numarray.py
--- a/pypy/module/micronumpy/interp_numarray.py
+++ b/pypy/module/micronumpy/interp_numarray.py
@@ -8,8 +8,8 @@
 from pypy.tool.sourcetools import func_with_new_name
 from pypy.rlib.rstring import StringBuilder
 
-numpy_driver = jit.JitDriver(greens = ['signature'],
-                             reds = ['result_size', 'i', 'ri', 'self',
+numpy_driver = jit.JitDriver(greens=['signature'],
+                             reds=['result_size', 'i', 'ri', 'self',
                                      'result'])
 all_driver = jit.JitDriver(greens=['signature'], reds=['i', 'self', 'dtype'])
 any_driver = jit.JitDriver(greens=['signature'], reds=['i', 'self', 'dtype'])
@@ -39,13 +39,21 @@
         shape.append(size)
         batch = new_batch
 
-class BroadcastDescription(object):
-    def __init__(self, shape, indices1, indices2):
-        self.shape = shape
-        self.indices1 = indices1
-        self.indices2 = indices2
+#class BroadcastDescription(object):
+#    def __init__(self, shape, indices1, indices2):
+#        self.shape = shape
+#        self.indices1 = indices1
+#        self.indices2 = indices2
+
 
 def shape_agreement(space, shape1, shape2):
+    ret = _shape_agreement(shape1, shape2)
+    if len(ret) < max(len(shape1), len(shape2)):
+        raise OperationError(space.w_ValueError, space.wrap(
+            "shape mismatch: objects cannot be broadcast to a single shape"))
+    return ret
+
+def _shape_agreement(shape1, shape2):
     """ Checks agreement about two shapes with respect to broadcasting. Returns
     the resulting shape.
     """
@@ -79,8 +87,9 @@
             endshape[i] = left
             indices2[i + rshift] = False
         else:
-            raise OperationError(space.w_ValueError, space.wrap(
-                "frames are not aligned"))
+            return []
+            #raise OperationError(space.w_ValueError, space.wrap(
+            #    "frames are not aligned"))
     for i in range(m - n):
         adjustment = True
         endshape[i] = remainder[i]
@@ -91,7 +100,6 @@
     #if not adjustment:
     #    return None
     return endshape
-    return BroadcastDescription(endshape, indices1, indices2)
 
 def descr_new_array(space, w_subtype, w_item_or_iterable, w_dtype=None,
                     w_order=NoneNotWrapped):
@@ -130,7 +138,7 @@
     for i in range(len(elems_w)):
         w_elem = elems_w[i]
         dtype.setitem_w(space, arr.storage, arr_iter.offset, w_elem)
-        arr_iter = arr_iter.next()
+        arr_iter.next()
     return arr
 
 class BaseIterator(object):
@@ -146,11 +154,11 @@
 class ArrayIterator(BaseIterator):
     def __init__(self, size):
         self.offset = 0
-        self.size   = size
+        self.size = size
 
     def next(self):
         self.offset += 1
-        return self
+        #return self
 
     def done(self):
         return self.offset >= self.size
@@ -179,7 +187,7 @@
                 self.offset -= self.arr.backshards[i]
         else:
             self._done = True
-        return self
+        #return self
 
     def done(self):
         return self._done
@@ -187,7 +195,47 @@
     def get_offset(self):
         return self.offset
 
-class ResizingIterator(object):
+class BroadcastIterator(BaseIterator):
+    '''Like a view iterator, but will repeatedly access values
+       for all iterations across a res_shape, folding the offset
+       using mod() arithmetic
+    '''
+    def __init__(self, arr, res_shape):
+        self.indices = [0] * len(res_shape)
+        self.offset  = arr.start
+        self.shards  = [s for s in arr.shards]  # Is there a better way to make a copy in rpython?
+        self.backshards = [s for s in arr.backshards]  # Is there a better way to make a copy in rpython?
+        self.shape_len = len(res_shape)
+        self.res_shape = res_shape
+        for i in range(self.shape_len - len(arr.shape)):
+            self.shards.insert(0, 0)
+            self.backshards.insert(0, 0)
+        self._done = False
+        self.size = sum(arr.shape)
+        self.arr = arr
+
+    @jit.unroll_safe
+    def next(self):
+        shape_len = jit.promote(self.shape_len)
+        for i in range(shape_len - 1, -1, -1):
+            if self.indices[i] < self.res_shape[i] - 1:
+                self.indices[i] += 1
+                self.offset += self.shards[i]
+                break
+            else:
+                self.indices[i] = 0
+                self.offset -= self.backshards[i]
+        else:
+            self._done = True
+        #return self
+
+    def done(self):
+        return self._done
+
+    def get_offset(self):
+        return self.offset % self.size
+
+class _ResizingIterator(object):
     def __init__(self, iter, shape, orig_indices):
         self.shape = shape
         self.indices = [0] * len(shape)
@@ -207,7 +255,7 @@
                 self.indices[i] = 0
         else:
             self._done = True
-        return self
+        #return self
 
     def get_offset(self):
         return self.iter.get_offset()
@@ -223,7 +271,7 @@
     def next(self):
         self.left.next()
         self.right.next()
-        return self
+        #return self
 
     def done(self):
         return self.left.done() or self.right.done()
@@ -239,7 +287,7 @@
 
     def next(self):
         self.child.next()
-        return self
+        #return self
 
     def done(self):
         return self.child.done()
@@ -249,7 +297,8 @@
 
 class ConstantIterator(BaseIterator):
     def next(self):
-        return self
+        pass
+        #return self
 
     def done(self):
         return False
@@ -356,7 +405,7 @@
 
     def _reduce_argmax_argmin_impl(op_name):
         reduce_driver = jit.JitDriver(greens=['signature'],
-                         reds = ['i', 'result', 'self', 'cur_best', 'dtype'])
+                         reds=['i', 'result', 'self', 'cur_best', 'dtype'])
         def loop(self):
             i = self.start_iter(self.shape)
             result = i.get_offset()
@@ -372,7 +421,7 @@
                 if dtype.ne(new_best, cur_best):
                     result = i.get_offset()
                     cur_best = new_best
-                i = i.next()
+                i.next()
             return result
         def impl(self, space):
             size = self.find_size()
@@ -390,7 +439,7 @@
             all_driver.jit_merge_point(signature=self.signature, self=self, dtype=dtype, i=i)
             if not dtype.bool(self.eval(i)):
                 return False
-            i = i.next()
+            i.next()
         return True
     def descr_all(self, space):
         return space.wrap(self._all())
@@ -403,7 +452,7 @@
                                        dtype=dtype, i=i)
             if dtype.bool(self.eval(i)):
                 return True
-            i = i.next()
+            i.next()
         return False
     def descr_any(self, space):
         return space.wrap(self._any())
@@ -507,9 +556,10 @@
                 view.to_str(space, comma, builder, indent=indent + ' ', use_ellipsis=use_ellipsis)
                 i += 1
         elif ndims == 1:
-            #This should not directly access the start,shards: what happens if order changes?
             spacer = ',' * comma + ' '
             item = self.start
+            #An iterator would be a nicer way to walk along the 1d array, but how do
+            # I reset it if printing ellipsis? iterators have no "set_offset()"
             i = 0
             if use_ellipsis:
                 for i in range(3):
@@ -521,6 +571,7 @@
                     item += self.shards[0]
                 #Add a comma only if comma is False - this prevents adding two commas
                 builder.append(spacer + '...' + ',' * (1 - comma))
+                #Ugly, but can this be done with an iterator?
                 item = self.start + self.backshards[0] - 2 * self.shards[0]
                 i = self.shape[0] - 3
             while i < self.shape[0]:
@@ -779,8 +830,8 @@
                                          result_size=result_size, i=i, ri=ri,
                                          self=self, result=result)
             result.dtype.setitem(result.storage, ri.offset, self.eval(i))
-            i = i.next()
-            ri = ri.next()
+            i.next()
+            ri.next()
         return result
 
     def force_if_needed(self):
@@ -852,23 +903,22 @@
         self.left = left
         self.right = right
         self.calc_dtype = calc_dtype
+        self.size = 1
+        for s in self.shape:
+            self.size *= s
 
     def _del_sources(self):
         self.left = None
         self.right = None
 
     def _find_size(self):
-        try:
-            return self.left.find_size()
-        except ValueError:
-            pass
-        return self.right.find_size()
+        return self.size
 
     def start_iter(self, res_shape=None):
         if self.forced_result is not None:
             return self.forced_result.start_iter(res_shape)
         if res_shape is None:
-            res_shape = self.shape # we still force the shape on children
+            res_shape = self.shape  # we still force the shape on children
         return Call2Iterator(self.left.start_iter(res_shape),
                              self.right.start_iter(res_shape))
 
@@ -906,7 +956,7 @@
         return self.parent.getitem(item)
 
     def eval(self, iter):
-        assert isinstance(iter, ViewIterator)
+        assert isinstance(iter, (ViewIterator, BroadcastIterator))
         return self.parent.getitem(iter.offset)
 
     @unwrap_spec(item=int)
@@ -961,12 +1011,19 @@
                                          source_iter=source_iter)
             self.setitem(res_iter.offset, source.eval(source_iter).convert_to(
                 self.find_dtype()))
-            source_iter = source_iter.next()
-            res_iter = res_iter.next()
+            source_iter.next()
+            res_iter.next()
 
     def start_iter(self, res_shape=None):
         if res_shape is not None and res_shape != self.shape:
-            raise NotImplementedError # xxx
+            # I would prefer to throw the exception using a space,
+            # but do not have access to one here. Is there a way
+            # to get access to one and pass it into shape_agreement?
+            res_shape = _shape_agreement(self.shape, res_shape)
+            if len(res_shape) < len(self.shape):
+                raise ValueError("shape mismatch: objects cannot" + \
+                                 " be broadcast to a single shape")
+            return BroadcastIterator(self, res_shape)
             #return ResizingIterator(ViewIterator(self), res_shape, orig_indices)
         return ViewIterator(self)
 
@@ -1003,7 +1060,7 @@
         return self.dtype.getitem(self.storage, item)
 
     def eval(self, iter):
-        assert isinstance(iter, ArrayIterator)
+        assert isinstance(iter, (ArrayIterator, BroadcastIterator))
         return self.dtype.getitem(self.storage, iter.offset)
 
     def descr_len(self, space):
@@ -1023,7 +1080,14 @@
     def start_iter(self, res_shape=None):
         if self.order == 'C':
             if res_shape is not None and res_shape != self.shape:
-                raise NotImplementedError # xxx
+                res_shape = _shape_agreement(self.shape, res_shape)
+                # I would prefer to throw the exception using a space,
+                # but do not have access to one here. Is there a way
+                # to get access to one and pass it into shape_agreement?
+                if len(res_shape) < len(self.shape):
+                    raise ValueError("shape mismatch: objects cannot " + \
+                       "be broadcast to a single shape")
+                return BroadcastIterator(self, res_shape)
             return ArrayIterator(self.size)
         raise NotImplementedError  # use ViewIterator simply, test it
 
diff --git a/pypy/module/micronumpy/test/test_base.py b/pypy/module/micronumpy/test/test_base.py
--- a/pypy/module/micronumpy/test/test_base.py
+++ b/pypy/module/micronumpy/test/test_base.py
@@ -31,12 +31,12 @@
 
     def test_slice_signature(self, space):
         ar = NDimArray(10, [10], dtype=space.fromcache(interp_dtype.W_Float64Dtype))
-        v1 = ar.descr_getitem(space, space.wrap(slice(1, 5, 1)))
+        v1 = ar.descr_getitem(space, space.wrap(slice(1, 3, 1)))
         v2 = ar.descr_getitem(space, space.wrap(slice(4, 6, 1)))
         assert v1.signature is v2.signature
 
-        v3 = ar.descr_add(space, v1)
-        v4 = ar.descr_add(space, v2)
+        v3 = v2.descr_add(space, v1)
+        v4 = v1.descr_add(space, v2)
         assert v3.signature is v4.signature
 
 class TestUfuncCoerscion(object):
diff --git a/pypy/module/micronumpy/test/test_numarray.py b/pypy/module/micronumpy/test/test_numarray.py
--- a/pypy/module/micronumpy/test/test_numarray.py
+++ b/pypy/module/micronumpy/test/test_numarray.py
@@ -475,7 +475,7 @@
 
     def test_mod(self):
         from numpypy import array
-        a = array(range(1,6))
+        a = array(range(1, 6))
         b = a % a
         for i in range(5):
             assert b[i] == 0
@@ -855,18 +855,19 @@
         from numpypy import array
         a = array([[1, 2], [3, 4], [5, 6]])
         b = array([5, 6])
-        #print a + b
-        c = ((a + b) == [[1+5, 2+6], [3+5, 4+6], [5+5, 6+6]])
-        print c
-        print c.all()
+        c = ((a + b) == [[1 + 5, 2 + 6], [3 + 5, 4 + 6], [5 + 5, 6 + 6]])
         assert c.all()
 
     def test_broadcast_setslice(self):
-        import numpypy
-        a = numpypy.zeros((100, 100))
-        b = numpypy.ones(100)
+        from numpypy import zeros, ones, array
+        a = zeros((100, 100))
+        b = ones(100)
         a[:, :] = b
         assert a[13, 15] == 1
+        a = zeros((3, 1, 3))
+        b = array(((10, 11, 12), (20, 21, 22), (30, 31, 32)))
+        c = ((a + b) == [b, b, b])
+        assert c.all()
 
 class AppTestSupport(object):
     def setup_class(cls):


More information about the pypy-commit mailing list