[Scipy-svn] r2442 - trunk/Lib/sandbox/timeseries

scipy-svn at scipy.org scipy-svn at scipy.org
Wed Dec 20 12:11:29 EST 2006


Author: mattknox_ca
Date: 2006-12-20 11:11:26 -0600 (Wed, 20 Dec 2006)
New Revision: 2442

Modified:
   trunk/Lib/sandbox/timeseries/timeseries.py
Log:
re-write of timeseries class to be a subclass of Masked Array instead of shifting array

Modified: trunk/Lib/sandbox/timeseries/timeseries.py
===================================================================
--- trunk/Lib/sandbox/timeseries/timeseries.py	2006-12-20 17:10:19 UTC (rev 2441)
+++ trunk/Lib/sandbox/timeseries/timeseries.py	2006-12-20 17:11:26 UTC (rev 2442)
@@ -2,37 +2,136 @@
 from numpy import ma
 
 import corelib
-import shiftingarray as sa
-from shiftingarray import doFunc, doFunc_oneseries
 import cseries
 import tsdate
-import copy
+import copy as copytools
 
-class TimeSeries(sa.ShiftingArray):
-    def __init__(self, values=[], dtype=None, freq=None, observed='END', startIndex=None, mask=ma.nomask):
+
+def ts_compatible(a, b):
+    if a.freq != b.freq:
+        raise ValueError("Both TimeSeries must have same freq!")
+    elif a.start_date() != b.start_date():
+        raise ValueError("Both TimeSeries must have same start_date!")
+    elif a.shape != b.shape:
+        raise ValueError("Both TimeSeries must be of the same size!")
+
+
+class ts_unary_operation:
+    def __init__ (self, abfunc):
+        self.f = abfunc
+        self.__doc__ = getattr(abfunc, "__doc__", str(abfunc))
+
+    def __call__ (self, a, *args, **kwargs):
+        "Execute the call behavior."
+        if isinstance(a, TimeSeries):
+            return TimeSeries(self.f(a, *args, **kwargs), freq=a.freq, observed=a.observed, start_date=a.start_date())
+        else:
+            return self.f(a, *args, **kwargs)
+        
+        
+class ts_binary_operation:
+    def __init__ (self, abfunc):
+        self.f = abfunc
+        self.__doc__ = getattr(abfunc, "__doc__", str(abfunc))
+
+    def __call__ (self, a, b, *args, **kwargs):
+        "Execute the call behavior."
+
+        if isinstance(a, TimeSeries) and isinstance(b, TimeSeries):
+            ts_compatible(a, b)
+            return TimeSeries(self.f(a, b, *args, **kwargs), freq=a.freq, observed=a.observed, start_date=a.start_date())
+        elif isinstance(a, TimeSeries):
+            return TimeSeries(self.f(a, b, *args, **kwargs), freq=a.freq, observed=a.observed, start_date=a.start_date())
+        elif isinstance(b, TimeSeries):
+            return TimeSeries(self.f(a, b, *args, **kwargs), freq=b.freq, observed=b.observed, start_date=b.start_date())
+        else:
+            return self.f(a, b, *args, **kwargs)
+            
+            
+
+class TimeSeries(ma.MaskedArray):
+
+    __array_priority__ = 10.2
+
+    def __init__(self, data, dtype=None, freq=None, start_date=None, observed=None, copy=True, order=False, mask=ma.nomask, fill_value=None):
     
-        if freq is None: raise ValueError("freq not specified")
+        if isinstance(data, TimeSeries):
+            if freq is None: freq = data.freq
+            if start_date is None: start_date = data.start_date()
+            if observed is None: observed = data.observed
+        else:
+            if observed is None: observed = 'END'
         
-        if dtype is None: dtype = values.dtype
+        self.freq = corelib.fmtFreq(freq)
 
-        super(TimeSeries, self).__init__(values, dtype, startIndex, mask)
-        self.freq = corelib.fmtFreq(freq)
+        if isinstance(start_date, tsdate.Date):
+            if start_date.freq != self.freq: raise ValueError("frequency of start_date must match frequency of series")
+            else: self.__start_date = start_date
+        else:
+            self.__start_date = tsdate.Date(freq=self.freq, value=start_date)
+
         self.observed = corelib.fmtObserv(observed)
-        self.dtype = dtype
+
+        self.tstype = None
+
+        if corelib.isDateType(dtype) or (isinstance(data, TimeSeries) and corelib.isDateType(data.tstype)):
+            self.tstype = dtype
+            dtype = numpy.int_
+
+        super(TimeSeries, self).__init__(data=data, dtype=dtype, copy=copy, order=order, mask=mask, fill_value=fill_value)
         
+        if self.tstype is None: self.tstype = self.dtype
+
+
     def __getitem__(self, key):
-        if isinstance(key,tsdate.Date):
-            if self.freq != key.freq:
-                raise "series of frequency "+str(self.freq)+" given date expression of type "+str(key.freq)
-            else:
-                key = int(key)
-        return super(TimeSeries, self).__getitem__(key)
+        return super(TimeSeries, self).__getitem__(self.__prepKey(key))
         
     def __setitem__(self, key, value):
+        super(TimeSeries, self).__setitem__(self.__prepKey(key), value)
+
+    def __prepKey(self, key):
+    
         if isinstance(key, tsdate.Date):
-            key = int(key)
-        super(TimeSeries, self).__setitem__(key, value)
+            key = int(key - self.start_date())
+            if key < 0: raise ValueError("Date out of bounds")
+            else: return key
 
+        elif isinstance(key, TimeSeries):
+            if corelib.isDateType(key.tstype):
+                if key.tstype.freq != self.freq:
+                    raise ValueError("series of frequency "+str(self.freq)+" given date expression of type "+str(key.tstype.freq))
+
+                if key.mask is ma.nomask: key = numpy.asarray(key) - int(self.start_date())
+                else: key = numpy.asarray(key[key.mask == False]) - int(self.start_date())
+                
+                if len(numpy.where(key < 0)[0]) > 0: raise ValueError("Indices out of bounds")
+                
+                return key
+                
+            else:
+
+                # frequency, size, and start_date of key must all match self
+                # when the data type is note a date
+                ts_compatible(key, self)
+
+                if key.tstype is numpy.bool_:
+                    key = key.filled(False)
+                elif numpy.ravel(key.mask).any():
+                    raise ValueError("masked values cannot be used as indices!")
+
+                return numpy.asarray(key)
+        
+        elif isinstance(key, ma.MaskedArray):
+
+            if key.dtype is numpy.bool_:
+                key = key.filled(False)
+            elif numpy.ravel(key.mask).any():
+                raise ValueError("masked values cannot be used as indices!")
+
+            return numpy.asarray(key)
+        
+        else: return key
+
     
     def convert(self, freq, func='auto', position='END', interp=None):
         """
@@ -67,19 +166,18 @@
             if func == 'auto':
                 func = corelib.obsDict[self.observed]
 
-            firstIndex = corelib.first_unmasked(self.data)
-            if firstIndex is None:
-                return TimeSeries([], dtype=self.dtype, freq=toFreq, observed=self.observed)
+            if self.size == 0:
+                return TimeSeries(self, freq=toFreq, start_date=tsdate.dateOf(self.start_date(), toFreq))
 
-            startIndexAdj = self.firstValue()
 
-            lastIndex = corelib.last_unmasked(self.data)
+            tempData = self.filled()
 
-            tempData = copy.deepcopy(self.data[firstIndex:lastIndex+1])
-            tempMask = tempData.mask
-            tempData = tempData.filled()
+            if self.mask is ma.nomask:
+                tempMask = numpy.empty(tempData.shape, dtype=numpy.bool_)
+                tempMask[:] = False
+            else: tempMask = self.mask
 
-            cRetVal = cseries.reindex(tempData, fromFreq, toFreq, position, startIndexAdj, tempMask)
+            cRetVal = cseries.reindex(tempData, fromFreq, toFreq, position, int(self.start_date()), tempMask)
 
             _values = cRetVal['values']
             _mask = cRetVal['mask']
@@ -91,142 +189,193 @@
             if func is not None and tempData.ndim == 2:
                 tempData = corelib.apply_along_axis(func, 1, tempData)
                 
-            startIndex = cseries.convert(startIndexAdj, fromFreq, toFreq)
+            startIndex = cseries.convert(int(self.start_date()), fromFreq, toFreq)
+    
+            newStart = tsdate.dateOf(self.start_date(),toFreq, "BEFORE")
+            newEnd = tsdate.dateOf(self.end_date(),toFreq, "AFTER")
 
-            return TimeSeries(tempData, dtype=self.data.dtype, freq=toFreq, observed=self.observed, startIndex=startIndex)
+            return adjust_endpoints(TimeSeries(tempData, freq=toFreq, observed=self.observed, start_date=startIndex), start_date=newStart, end_date=newEnd)
             
         else:
-            return copy.deepcopy(self)
+            return copytools.deepcopy(self)
 
 
+    def adjust_endpoints(self, start_date=None, end_date=None):
+        self.__init__(adjust_endpoints(self, start_date=start_date, end_date=end_date))
+
         
     def __str__(self):
         retVal = ""
-        if self.firstValue() is not None:
-            for i in range(self.firstValue(),self.lastValue()+1):
-                index = str(tsdate.Date(freq=self.freq,value=i))
+
+        if self.shape[0] > 0:
+            for i in range(self.shape[0]):
+                index = str(self.start_date() + i)
                 index = index + (" " * (6-len(index)))
-                retVal += index + "---> " + str(super(TimeSeries, self).__getitem__(i)) + "\n"
+                retVal += index + " --> " + str(self[i])+"\n"
             return retVal
         else:
             return "<no data>"
             
             
-    def firstValue(self, asDate=False):
-        value = super(TimeSeries, self).firstValue()
+    def first_value(self, asDate=False):
+        firstIndex = corelib.first_unmasked(self)
         if asDate:
-            return tsdate.Date(freq=self.freq, value=value)
+            return self.start_date() + firstIndex
         else:
-            return value
+            return firstIndex
         
-    def lastValue(self, asDate=False):
-        value = super(TimeSeries, self).lastValue()
+    def last_value(self, asDate=False):
+        lastIndex = corelib.last_unmasked(self)
         if asDate:
-            return tsdate.Date(freq=self.freq, value=value)
+            return self.start_date() + lastIndex
         else:
-            return value
+            return lastIndex
+            
+    def start_date(self): return self.__start_date
+    def end_date(self): return self.__start_date + (self.shape[0] - 1)
+            
+    def date_to_index(self, date):
+        if date.freq != self.freq: raise ValueError("date.freq != self.freq")
+        return date - self.start_date()
+            
+            
+    # built-in methods
 
-    ### DATA 
-    
-    def __add__(self, other):
-        validOpInputs(self, other)
-        return SAtoTS(super(TimeSeries, self).__add__(other), self.freq, self.observed)
-        
-    def __radd__(self, other):
-        validOpInputs(self, other)
-        return SAtoTS(super(TimeSeries, self).__add__(other), self.freq, self.observed)
-        
-    def __sub__(self, other):
-        validOpInputs(self, other)
-        return SAtoTS(super(TimeSeries, self).__sub__(other), self.freq, self.observed)
-        
-    def __rsub__(self, other):
-        validOpInputs(self, other)
-        return SAtoTS(super(TimeSeries, self).__rsub__(other), self.freq, self.observed)
-        
-    def __mul__(self, other):
-        validOpInputs(self, other)
-        return SAtoTS(super(TimeSeries, self).__mul__(other), self.freq, self.observed)
-        
-    def __rmul__(self, other):
-        validOpInputs(self, other)
-        return SAtoTS(super(TimeSeries, self).__rmul__(other), self.freq, self.observed)
-        
-    def __div__(self, other):
-        validOpInputs(self, other)
-        return SAtoTS(super(TimeSeries, self).__div__(other), self.freq, self.observed)
-        
-    def __rdiv__(self, other):
-        validOpInputs(self, other)
-        return SAtoTS(super(TimeSeries, self).__rdiv__(other), self.freq, self.observed)
-        
-    def __pow__(self, other):
-        validOpInputs(self, other)
-        return SAtoTS(super(TimeSeries, self).__pow__(other), self.freq, self.observed)
-        
-    ### IN PLACE
-    
+    def __and__(self, other): return bitwise_and(self, other)
+    def __or__(self, other): return bitwise_or(self, other)
+    def __xor__(self, other): return bitwise_xor(self, other)
+    __rand__ = __and__
+    __ror__ = __or__
+    __rxor__ = __xor__
+    def __abs__(self): return absolute(self)
+    def __neg__(self): return negative(self)
+    def __pos__(self): return TimeSeries(self)
+    def __add__(self, other): return add(self, other)
+    __radd__ = __add__
+    def __mod__ (self, other): return remainder(self, other)
+    def __rmod__ (self, other): return remainder(other, self)
+    def __lshift__ (self, n): return left_shift(self, n)
+    def __rshift__ (self, n): return right_shift(self, n)
+    def __sub__(self, other): return subtract(self, other)
+    def __rsub__(self, other): return subtract(other, self)
+    def __mul__(self, other): return multiply(self, other)
+    __rmul__ = __mul__
+    def __div__(self, other): return divide(self, other)
+    def __rdiv__(self, other): return divide(other, self)
+    def __truediv__(self, other): return true_divide(self, other)
+    def __rtruediv__(self, other): return true_divide(other, self)
+    def __floordiv__(self, other): return floor_divide(self, other)
+    def __rfloordiv__(self, other): return floor_divide(other, self)
+    def __pow__(self, other, third=None): return power(self, other, third)
+    def __sqrt__(self): return sqrt(self)
+
     def __iadd__(self, other):
-        validOpInputs(self, other)
-        self = SAtoTS(super(TimeSeries, self).__add__(other), self.freq, self.observed)
-        return self
-    
-    def __isub__(self, other):
-        validOpInputs(self, other)
-        self = SAtoTS(super(TimeSeries, self).__sub__(other), self.freq, self.observed)
-        return self
-    
+        return self + other
+
     def __imul__(self, other):
-        validOpInputs(self, other)
-        self = SAtoTS(super(TimeSeries, self).__mul__(other), self.freq, self.observed)
-        return self
-    
-    def __idiv__(self, other):
-        validOpInputs(self, other)
-        self = SAtoTS(super(TimeSeries, self).__div__(other), self.freq, self.observed)
-        return self
-        
-    # this overrides & and should only be used by boolean series
-    def __and__(self, other):
-        validOpInputs(self, other)
         return self * other
 
-    # this overrides | and should only be used by boolean series
-    def __or__(self, other):
-        validOpInputs(self, other)
-        return ~(~self & ~other)
-            
-    # this overrides ~ and should only be used by boolean series
-    # it is our "not" operator
-    def __invert__(self):
-        return self == False
-    
-    ### COMPARISON
-    
-    def __eq__(self, other):
-        validOpInputs(self, other)
-        return SAtoTS(super(TimeSeries, self).__eq__(other), self.freq, self.observed)
-        
-    def __le__(self, other):
-        validOpInputs(self, other)
-        return SAtoTS(super(TimeSeries, self).__le__(other), self.freq, self.observed)
-        
-    def __lt__(self, other):
-        validOpInputs(self, other)
-        return SAtoTS(super(TimeSeries, self).__lt__(other), self.freq, self.observed)
-        
-    def __ge__(self, other):
-        validOpInputs(self, other)
-        return SAtoTS(super(TimeSeries, self).__ge__(other), self.freq, self.observed)
-        
-    def __gt__(self, other):
-        validOpInputs(self, other)
-        return SAtoTS(super(TimeSeries, self).__gt__(other), self.freq, self.observed)
+    def __isub__(self, other):
+        return self - other
 
+    def __idiv__(self, other):
+        return self / other
+
+    def __eq__(self, other): return equal(self,other)
+    def __ne__(self, other): return not_equal(self,other)
+    def __lt__(self, other): return less(self,other)
+    def __le__(self, other): return less_equal(self,other)
+    def __gt__(self, other): return greater(self,other)
+    def __ge__(self, other): return greater_equal(self,other)
+
+    def astype (self, tc):
+        "return self as array of given type."
+        d = self._data.astype(tc)
+        return datawrap(ma.array(d, mask=self._mask), self)
+
+    def filled (self, fill_value=None, ts=False):
+        d = super(TimeSeries, self).filled(fill_value)
+        if ts: return datawrap(d, self)
+        else: return d
+
+
+def datawrap(data, ts): return TimeSeries(data, freq=ts.freq, observed=ts.observed, start_date=ts.start_date())
+
+## wrappers for numpy.ma funcs
+
+sqrt = ts_unary_operation(ma.sqrt)
+log = ts_unary_operation(ma.log)
+log10 = ts_unary_operation(ma.log10)
+exp = ts_unary_operation(ma.exp)
+sin = ts_unary_operation(ma.sin)
+cos = ts_unary_operation(ma.cos)
+tan = ts_unary_operation(ma.tan)
+arcsin = ts_unary_operation(ma.arcsin)
+arccos = ts_unary_operation(ma.arccos)
+arctan = ts_unary_operation(ma.arctan)
+power = ts_binary_operation(ma.power)
+
+arcsinh = ts_unary_operation(ma.arcsinh)
+arccosh = ts_unary_operation(ma.arccosh)
+arctanh = ts_unary_operation(ma.arctanh)
+sinh = ts_unary_operation(ma.sinh)
+cosh = ts_unary_operation(ma.cosh)
+tanh = ts_unary_operation(ma.tanh)
+absolute = ts_unary_operation(ma.absolute)
+fabs = ts_unary_operation(ma.fabs)
+negative = ts_unary_operation(ma.negative)
+
+def nonzero(a): return datawrap(ma.nonzero(a), a)
+def zeros(shape, dtype=float, freq=None, start_date=None, observed=None):
+    return TimeSeries(ma.zeros(shape, dtype), freq=freq, start_date=start_date, observed=observed)
+def ones(shape, dtype=float, freq=None, start_date=None, observed=None):
+    return TimeSeries(ma.ones(shape, dtype), freq=freq, start_date=start_date, observed=observed)
+
+count = ma.count
+sum = ma.sum
+product = ma.product
+average = ma.average
+
+
+
+around = ts_unary_operation(ma.around)
+floor = ts_unary_operation(ma.floor)
+ceil = ts_unary_operation(ma.ceil)
+logical_not = ts_unary_operation(ma.logical_not)
+
+add = ts_binary_operation(ma.add)
+subtract = ts_binary_operation(ma.subtract)
+
+multiply = ts_binary_operation(ma.multiply)
+divide = ts_binary_operation(ma.divide)
+true_divide = ts_binary_operation(ma.true_divide)
+floor_divide = ts_binary_operation(ma.floor_divide)
+remainder = ts_binary_operation(ma.remainder)
+fmod = ts_binary_operation(ma.fmod)
+hypot = ts_binary_operation(ma.hypot)
+arctan2 = ts_binary_operation(ma.arctan2)
+equal = ts_binary_operation(ma.equal)
+not_equal = ts_binary_operation(ma.not_equal)
+less_equal = ts_binary_operation(ma.less_equal)
+greater_equal = ts_binary_operation(ma.greater_equal)
+less = ts_binary_operation(ma.less)
+greater = ts_binary_operation(ma.greater)
+logical_and = ts_binary_operation(ma.logical_and)
+logical_or = ts_binary_operation(ma.logical_or)
+logical_xor = ts_binary_operation(ma.logical_xor)
+bitwise_and = ts_binary_operation(ma.bitwise_and)
+bitwise_or = ts_binary_operation(ma.bitwise_or)
+bitwise_xor = ts_binary_operation(ma.bitwise_xor)
+
+def left_shift (a, n): return datawrap(ma.left_shift(a, n), a)
+def right_shift (a, n): return datawrap(ma.right_shift(a, n), a)
+
+# time series specific functions
+
 def tser(start, end):
     if start.freq != end.freq:
         raise ValueError("start and end dates must have same frequency!")
-    return TimeSeries(numpy.arange(int(start), int(end)+1), dtype=corelib.freqTypeMapping[start.freq], freq=start.freq, observed='END', startIndex=int(start))
+    return TimeSeries(numpy.arange(int(start), int(end)+1), dtype=corelib.freqTypeMapping[start.freq], freq=start.freq, start_date=start)
 
 def year(dateSer):
     return __getDateInfo(dateSer,'Y')
@@ -244,61 +393,50 @@
     return __getDateInfo(dateSer,'W')
 
 def __getDateInfo(dateSer,infoCode):
-    newData = ma.array(cseries.getDateInfo(dateSer.data.filled(), dateSer.dtype.freq, infoCode))
-    newData[dateSer.data.mask] = ma.masked
-    newSer = copy.deepcopy(dateSer)
-    newSer.data = newData
-    newSer.dtype = numpy.int_
-    return newSer
+    newData = ma.array(cseries.getDateInfo(dateSer.filled(), dateSer.tstype.freq, infoCode))
+    if dateSer.mask is not ma.nomask:
+        newData[dateSer.mask] = ma.masked
+    return datawrap(newData, dateSer)
 
-        
-def validOpInputs(ser1, ser2):
-    if isinstance(ser1, TimeSeries) and isinstance(ser2, TimeSeries) and ser1.freq != ser2.freq:
-        raise "operation cannot be performed on series with different frequencies ("+str(ser1.freq) + " and " + str(ser2.freq)+")"
 
+def adjust_endpoints(a, start_date=None, end_date=None):
+    """adjust_endpoints(a, start_date=None, end_date=None) returns a new
+    TimeSeries going from start_date to end_date"""
     
-def SAtoTS(values, freq, observed, dtype=None):
-    if dtype is None: _dtype = values.dtype
-    else: _dtype = dtype
-    return TimeSeries(values.data, dtype=_dtype, freq=freq, observed=observed, startIndex=values.indexZeroRepresents)
+    if start_date is None: start_date = a.start_date()
+    if end_date is None: end_date = a.end_date()
 
-
-# math functions (two series)
-def add(ser1, ser2, fill_value=ma.masked):
-    return apply_func_twoseries(ma.add, ser1, ser2, fill_value)
-
-def multiply(ser1, ser2, fill_value=ma.masked):
-    return apply_func_twoseries(ma.multiply, ser1, ser2, fill_value)
-
-def divide(ser1, ser2, fill_value=ma.masked):
-    return apply_func_twoseries(ma.divide, ser1, ser2, fill_value)
+    tmpShape = list(a.shape)
+    tmpShape[0] = max(end_date - start_date + 1, 0)
+    tmpShape = tuple(tmpShape)
     
-def subtract(ser1, ser2, fill_value=ma.masked):
-    return apply_func_twoseries(ma.subtract, ser1, ser2, fill_value)
+    tmpSer = TimeSeries(ma.resize(a, tmpShape), freq=a.freq, observed=a.observed, start_date=start_date)
     
-# math functions (one series, return series)
-def sqrt(ser):
-    return apply_func_oneseries(ma.sqrt, ser)
+    setStart, setEnd = max(start_date, a.start_date()), min(end_date, a.end_date())
+    setLen = setEnd - setStart
     
-# math functions (one series, return scalar)
-def sum(ser):
-    return ma.sum(ser.data)
+    tmpSer[:] = ma.masked
+    
+    if setLen >= 0:
+        tmpSer[tmpSer.date_to_index(setStart):tmpSer.date_to_index(setEnd)+1] = a[a.date_to_index(setStart):a.date_to_index(setEnd)+1]
+            
+    return tmpSer
 
-def product(ser):
-    return ma.product(ser.data)
+
+def aligned(*series, **kwargs):
     
-def average(ser):
-    return ma.average(ser.data)
+    if len(series) < 2:
+        return series
+        
+    freq = series[0].freq
     
-def where(condition, x, y):
-    tempResult = ma.where(condition.data, x, y)
-    return TimeSeries(tempResult, dtype=numpy.bool_, freq=condition.freq, observed=condition.observed, startIndex=condition.indexZeroRepresents)
-
-# generic functions
-def apply_func_twoseries(func, ser1, ser2, fill_value=ma.masked):
-    validOpInputs(ser1, ser2)
-    return SAtoTS(doFunc(ser1, ser2, func, fill_value=fill_value), ser1.freq, ser1.observed)
+    if len(set([x.freq for x in series])) > 1: raise ValueError("All series must have same frequency!")
     
-def apply_func_oneseries(func, ser):
-    return SAtoTS(doFunc_oneseries(ser, func),ser.freq, ser.observed)
+    if 'start_date' in kwargs: start_date = kwargs['start_date']
+    else: start_date = min([x.start_date() for x in series])
     
+    if 'end_date' in kwargs: end_date = kwargs['end_date']
+    else: end_date = max([x.end_date() for x in series])
+    
+    return [adjust_endpoints(x, start_date=start_date, end_date=end_date) for x in series]
+    
\ No newline at end of file




More information about the Scipy-svn mailing list