Masked arrays to Compressed arrays

Hi numpy users I am using masked arrays (MA) for gridded ocean forecast fields (temperature, salinity, sea level and so on). These fields always have the same masked elements (land). And in some cases memory usage is a real issue. I have therefore made a class which stores masked arrays in a more memory efficient way than the standard MA (I am calling it Compressed arrays). Furthermore in some of my applications I perform arithmetics which only needs to update the unmasked values in the array and do not need to know anything per se about the mask. I have therefore tried to put logic into my class which takes advantage of this knowledge and applies the arithmetic operators efficiently only on the unmasked values. MA stores as far as I know two arrays: one containing the data values and one containing the mask. Both have the same shape. My class stores the mask (or rather a flattened version of it and shape info) and a 1D data array which only contains values at unmasked elements. The class is not entirely finished but I would like to hear your constructive criticism of it. So to summarize: 1. Uses less memory except when temporarily converting to masked array (for example to perform arithmetics - hints on avoiding this would be greatly appreciated). 2. Has more efficient arithmetics in some cases. 3. Is less efficient in other cases. Here is the class (I know that the inline arithmetic operator overrides are wrong). I hope you have some good suggestions. I have not subclassed MA but that might be an alternative solution: """ Compressed array. A compressed array is an alternative to numpy masked arrays designed to reduce memory consumption. Jesper Larsen, 2010 """ # External imports import numpy as np def _compress(arr): """Compresses array.""" mask = np.ma.getmaskarray(arr) arrout = np.ma.compressed(arr) return arrout, mask def _decompress(arr, mask_flat, shape): """Decompresses array.""" arr_out = np.ma.masked_all(mask_flat.shape, dtype=arr.dtype) arr_out[~mask_flat] = arr[:] arr_out = arr_out.reshape(shape) return arr_out class carray(object): """Compressed array.""" def __init__(self, array, mask=None, same_masks=False): """\ Compressed array. Can be initialized with either a masked array instance or a flat numpy array and corresponding mask. If same_masks is set to True it is assumed that arithmetic operations on two such carrays with identical shapes have identical mask and thus optimized arithmetics can be used for some operations. """ if isinstance(array, np.ma.masked_array): # Initialize from masked array self.carray, mask = _compress(array) self.shape = mask.shape self._mask_flat = mask.ravel() elif isinstance(array, np.ndarray): # Construct a compressed array from raw input. if mask is None: raise TypeError('Mask must be present') self.carray = array self.shape = mask.shape self._mask_flat = mask.ravel() else: raise TypeError('Invalid input type') self.same_masks = True def __getattr__(self, name): """Override attribute access.""" if name == 'masked_array': return _decompress(self.carray, self._mask_flat, self.shape) if name == 'mask': return self._mask_flat.reshape(self.shape) else: # Find attribute in array data structure return getattr(self.masked_array, name) raise AttributeError(name) def __setattr__(self, name, value): """Override attribute access.""" if name == 'masked_array': self.carray, mask = _compress(value) self.shape = mask.shape self._mask_flat = mask.ravel() else: object.__setattr__(self, name, value) def _optimized_arithmetics(self, method, *args): """\ Generic method for efficiently performing arithmetics on compressed arrays. """ if len(args) > 0 and isinstance(args[0], carray): other = args[0] else: other = None if other is None: np_method = getattr(self.carray, method) carr = np_method(*args) return carray(carr, self.mask) elif self is other: np_method = getattr(self.carray, method) carr = np_method(other.carray, *args[1:]) return carray(carr, self.mask) elif self.same_masks and other.same_masks and \ self.shape == other.shape: np_method = getattr(self.carray, method) carr = np_method(other.carray, *args[1:]) return carray(carr, self.mask) else: ma_method = getattr(self.masked_array, method) return carray(ma_method(*args)) def _optimized_arithmetic_methods(self, method, *args, **kwargs): """\ Generic method for effiently applying arithmetic methods on compressed arrays. """ if len(args) == 0 and 'axis' not in kwargs: np_method = getattr(self.carray, method) carr = np_method(*args, **kwargs) if isinstance(carr, np.ndarray): return carray(carr, self.mask) else: return carr else: ma_method = getattr(self.masked_array, method) return carray(ma_method(*args, **kwargs)) """Expose access to the "masked_array" container at the top level""" def __repr__(self): return 'carray(\n' + self.masked_array.__repr__() + ')\n' def __str__(self): return self.masked_array.__str__() def __len__(self): return self.masked_array.__len__() def __getitem__(self, index): return self.masked_array.__getitem__(index) def __setitem__(self, index, value): return self.masked_array.__setitem__(index, value) def __delitem__(self, index): return self.masked_array.__delitem__(index) def __iter__(self): return self.masked_array.__iter__() def __contains__(self, item): return self.masked_array.__contains(item) """Optimized arithmetics""" def __add__(self, *args): return self._optimized_arithmetics('__add__', *args) def __sub__(self, *args): return self._optimized_arithmetics('__sub__', *args) def __mul__(self, *args): return self._optimized_arithmetics('__mul__', *args) def __floordiv__(self, *args): return self._optimized_arithmetics('__floordiv__', *args) def __mod__(self, *args): return self._optimized_arithmetics('__mod__', *args) def __divmod__(self, *args): return self._optimized_arithmetics('__divmod__', *args) def __pow__(self, *args): return self._optimized_arithmetics('__pow__', *args) def __lshift__(self, *args): return self._optimized_arithmetics('__lshift__', *args) def __rshift__(self, *args): return self._optimized_arithmetics('__rshift__', *args) def __and__(self, *args): return self._optimized_arithmetics('__and__', *args) def __xor__(self, *args): return self._optimized_arithmetics('__xor__', *args) def __or__(self, *args): return self._optimized_arithmetics('__or__', *args) def __div__(self, *args): return self._optimized_arithmetics('__div__', *args) def __truediv__(self, *args): return self._optimized_arithmetics('__truediv__', *args) def __radd__(self, *args): return self._optimized_arithmetics('__radd__', *args) def __rsub__(self, *args): return self._optimized_arithmetics('__rsub__', *args) def __rmul__(self, *args): return self._optimized_arithmetics('__rmul__', *args) def __rdiv__(self, *args): return self._optimized_arithmetics('__rdiv__', *args) def __rtruediv__(self, *args): return self._optimized_arithmetics('__rtruediv__', *args) def __rfloordiv__(self, *args): return self._optimized_arithmetics('__rfloordiv__', *args) def __rmod__(self, *args): return self._optimized_arithmetics('__rmod__', *args) def __rdivmod__(self, *args): return self._optimized_arithmetics('__rdivmod__', *args) def __rpow__(self, *args): return self._optimized_arithmetics('__rpow__', *args) def __rlshift__(self, *args): return self._optimized_arithmetics('__rlshift__', *args) def __rrshift__(self, *args): return self._optimized_arithmetics('__rrshift__', *args) def __rand__(self, *args): return self._optimized_arithmetics('__rand__', *args) def __rxor__(self, *args): return self._optimized_arithmetics('__rxor__', *args) def __ror__(self, *args): return self._optimized_arithmetics('__ror__', *args) def __iadd__(self, *args): return self._optimized_arithmetics('__iadd__', *args) def __isub__(self, *args): return self._optimized_arithmetics('__isub__', *args) def __imul__(self, *args): return self._optimized_arithmetics('__imul__', *args) def __idiv__(self, *args): return self._optimized_arithmetics('__idiv__', *args) def __itruediv__(self, *args): return self._optimized_arithmetics('__itruediv__', *args) def __ifloordiv__(self, *args): return self._optimized_arithmetics('__ifloordiv__', *args) def __imod__(self, *args): return self._optimized_arithmetics('__imod__', *args) def __ipow__(self, *args): return self._optimized_arithmetics('__ipow__', *args) def __ilshift__(self, *args): return self._optimized_arithmetics('__ilshift__', *args) def __irshift__(self, *args): return self._optimized_arithmetics('__irshift__', *args) def __iand__(self, *args): return self._optimized_arithmetics('__iand__', *args) def __ixor__(self, *args): return self._optimized_arithmetics('__ixor__', *args) def __ior__(self, *args): return self._optimized_arithmetics('__ior__', *args) def __neg__(self): return self._optimized_arithmetics('__neg__') def __pos__(self): return self._optimized_arithmetics('__pos__') def __abs__(self): return self._optimized_arithmetics('__abs__') def __invert__(self): return self._optimized_arithmetics('__invert__') def __complex__(self): return self._optimized_arithmetics('__complex__') def __int__(self): return self._optimized_arithmetics('__int__') def __long__(self): return self._optimized_arithmetics('__long__') def __float__(self): return self._optimized_arithmetics('__float__') def __oct__(self): return self._optimized_arithmetics('__oct__') def __hex__(self): return self._optimized_arithmetics('__hex__') def __index__(self): return self.masked_array.__index__(self) # Optimized methods (only sum implemented for now) # We can optimize some methods when they operate on the entire # flattened array. Otherwise we delegate to the masked array def sum(self, *args, **kwargs): return self._optimized_arithmetic_methods('sum', *args, **kwargs)
participants (1)
-
Jesper Larsen