[Scipy-svn] r2442 - trunk/Lib/sandbox/timeseries
scipy-svn at scipy.org
scipy-svn at scipy.org
Wed Dec 20 12:11:29 EST 2006
Author: mattknox_ca
Date: 2006-12-20 11:11:26 -0600 (Wed, 20 Dec 2006)
New Revision: 2442
Modified:
trunk/Lib/sandbox/timeseries/timeseries.py
Log:
re-write of timeseries class to be a subclass of Masked Array instead of shifting array
Modified: trunk/Lib/sandbox/timeseries/timeseries.py
===================================================================
--- trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-20 17:10:19 UTC (rev 2441)
+++ trunk/Lib/sandbox/timeseries/timeseries.py 2006-12-20 17:11:26 UTC (rev 2442)
@@ -2,37 +2,136 @@
from numpy import ma
import corelib
-import shiftingarray as sa
-from shiftingarray import doFunc, doFunc_oneseries
import cseries
import tsdate
-import copy
+import copy as copytools
-class TimeSeries(sa.ShiftingArray):
- def __init__(self, values=[], dtype=None, freq=None, observed='END', startIndex=None, mask=ma.nomask):
+
+def ts_compatible(a, b):
+ if a.freq != b.freq:
+ raise ValueError("Both TimeSeries must have same freq!")
+ elif a.start_date() != b.start_date():
+ raise ValueError("Both TimeSeries must have same start_date!")
+ elif a.shape != b.shape:
+ raise ValueError("Both TimeSeries must be of the same size!")
+
+
+class ts_unary_operation:
+ def __init__ (self, abfunc):
+ self.f = abfunc
+ self.__doc__ = getattr(abfunc, "__doc__", str(abfunc))
+
+ def __call__ (self, a, *args, **kwargs):
+ "Execute the call behavior."
+ if isinstance(a, TimeSeries):
+ return TimeSeries(self.f(a, *args, **kwargs), freq=a.freq, observed=a.observed, start_date=a.start_date())
+ else:
+ return self.f(a, *args, **kwargs)
+
+
+class ts_binary_operation:
+ def __init__ (self, abfunc):
+ self.f = abfunc
+ self.__doc__ = getattr(abfunc, "__doc__", str(abfunc))
+
+ def __call__ (self, a, b, *args, **kwargs):
+ "Execute the call behavior."
+
+ if isinstance(a, TimeSeries) and isinstance(b, TimeSeries):
+ ts_compatible(a, b)
+ return TimeSeries(self.f(a, b, *args, **kwargs), freq=a.freq, observed=a.observed, start_date=a.start_date())
+ elif isinstance(a, TimeSeries):
+ return TimeSeries(self.f(a, b, *args, **kwargs), freq=a.freq, observed=a.observed, start_date=a.start_date())
+ elif isinstance(b, TimeSeries):
+ return TimeSeries(self.f(a, b, *args, **kwargs), freq=b.freq, observed=b.observed, start_date=b.start_date())
+ else:
+ return self.f(a, b, *args, **kwargs)
+
+
+
+class TimeSeries(ma.MaskedArray):
+
+ __array_priority__ = 10.2
+
+ def __init__(self, data, dtype=None, freq=None, start_date=None, observed=None, copy=True, order=False, mask=ma.nomask, fill_value=None):
- if freq is None: raise ValueError("freq not specified")
+ if isinstance(data, TimeSeries):
+ if freq is None: freq = data.freq
+ if start_date is None: start_date = data.start_date()
+ if observed is None: observed = data.observed
+ else:
+ if observed is None: observed = 'END'
- if dtype is None: dtype = values.dtype
+ self.freq = corelib.fmtFreq(freq)
- super(TimeSeries, self).__init__(values, dtype, startIndex, mask)
- self.freq = corelib.fmtFreq(freq)
+ if isinstance(start_date, tsdate.Date):
+ if start_date.freq != self.freq: raise ValueError("frequency of start_date must match frequency of series")
+ else: self.__start_date = start_date
+ else:
+ self.__start_date = tsdate.Date(freq=self.freq, value=start_date)
+
self.observed = corelib.fmtObserv(observed)
- self.dtype = dtype
+
+ self.tstype = None
+
+ if corelib.isDateType(dtype) or (isinstance(data, TimeSeries) and corelib.isDateType(data.tstype)):
+ self.tstype = dtype
+ dtype = numpy.int_
+
+ super(TimeSeries, self).__init__(data=data, dtype=dtype, copy=copy, order=order, mask=mask, fill_value=fill_value)
+ if self.tstype is None: self.tstype = self.dtype
+
+
def __getitem__(self, key):
- if isinstance(key,tsdate.Date):
- if self.freq != key.freq:
- raise "series of frequency "+str(self.freq)+" given date expression of type "+str(key.freq)
- else:
- key = int(key)
- return super(TimeSeries, self).__getitem__(key)
+ return super(TimeSeries, self).__getitem__(self.__prepKey(key))
def __setitem__(self, key, value):
+ super(TimeSeries, self).__setitem__(self.__prepKey(key), value)
+
+ def __prepKey(self, key):
+
if isinstance(key, tsdate.Date):
- key = int(key)
- super(TimeSeries, self).__setitem__(key, value)
+ key = int(key - self.start_date())
+ if key < 0: raise ValueError("Date out of bounds")
+ else: return key
+ elif isinstance(key, TimeSeries):
+ if corelib.isDateType(key.tstype):
+ if key.tstype.freq != self.freq:
+ raise ValueError("series of frequency "+str(self.freq)+" given date expression of type "+str(key.tstype.freq))
+
+ if key.mask is ma.nomask: key = numpy.asarray(key) - int(self.start_date())
+ else: key = numpy.asarray(key[key.mask == False]) - int(self.start_date())
+
+ if len(numpy.where(key < 0)[0]) > 0: raise ValueError("Indices out of bounds")
+
+ return key
+
+ else:
+
+ # frequency, size, and start_date of key must all match self
+ # when the data type is note a date
+ ts_compatible(key, self)
+
+ if key.tstype is numpy.bool_:
+ key = key.filled(False)
+ elif numpy.ravel(key.mask).any():
+ raise ValueError("masked values cannot be used as indices!")
+
+ return numpy.asarray(key)
+
+ elif isinstance(key, ma.MaskedArray):
+
+ if key.dtype is numpy.bool_:
+ key = key.filled(False)
+ elif numpy.ravel(key.mask).any():
+ raise ValueError("masked values cannot be used as indices!")
+
+ return numpy.asarray(key)
+
+ else: return key
+
def convert(self, freq, func='auto', position='END', interp=None):
"""
@@ -67,19 +166,18 @@
if func == 'auto':
func = corelib.obsDict[self.observed]
- firstIndex = corelib.first_unmasked(self.data)
- if firstIndex is None:
- return TimeSeries([], dtype=self.dtype, freq=toFreq, observed=self.observed)
+ if self.size == 0:
+ return TimeSeries(self, freq=toFreq, start_date=tsdate.dateOf(self.start_date(), toFreq))
- startIndexAdj = self.firstValue()
- lastIndex = corelib.last_unmasked(self.data)
+ tempData = self.filled()
- tempData = copy.deepcopy(self.data[firstIndex:lastIndex+1])
- tempMask = tempData.mask
- tempData = tempData.filled()
+ if self.mask is ma.nomask:
+ tempMask = numpy.empty(tempData.shape, dtype=numpy.bool_)
+ tempMask[:] = False
+ else: tempMask = self.mask
- cRetVal = cseries.reindex(tempData, fromFreq, toFreq, position, startIndexAdj, tempMask)
+ cRetVal = cseries.reindex(tempData, fromFreq, toFreq, position, int(self.start_date()), tempMask)
_values = cRetVal['values']
_mask = cRetVal['mask']
@@ -91,142 +189,193 @@
if func is not None and tempData.ndim == 2:
tempData = corelib.apply_along_axis(func, 1, tempData)
- startIndex = cseries.convert(startIndexAdj, fromFreq, toFreq)
+ startIndex = cseries.convert(int(self.start_date()), fromFreq, toFreq)
+
+ newStart = tsdate.dateOf(self.start_date(),toFreq, "BEFORE")
+ newEnd = tsdate.dateOf(self.end_date(),toFreq, "AFTER")
- return TimeSeries(tempData, dtype=self.data.dtype, freq=toFreq, observed=self.observed, startIndex=startIndex)
+ return adjust_endpoints(TimeSeries(tempData, freq=toFreq, observed=self.observed, start_date=startIndex), start_date=newStart, end_date=newEnd)
else:
- return copy.deepcopy(self)
+ return copytools.deepcopy(self)
+ def adjust_endpoints(self, start_date=None, end_date=None):
+ self.__init__(adjust_endpoints(self, start_date=start_date, end_date=end_date))
+
def __str__(self):
retVal = ""
- if self.firstValue() is not None:
- for i in range(self.firstValue(),self.lastValue()+1):
- index = str(tsdate.Date(freq=self.freq,value=i))
+
+ if self.shape[0] > 0:
+ for i in range(self.shape[0]):
+ index = str(self.start_date() + i)
index = index + (" " * (6-len(index)))
- retVal += index + "---> " + str(super(TimeSeries, self).__getitem__(i)) + "\n"
+ retVal += index + " --> " + str(self[i])+"\n"
return retVal
else:
return "<no data>"
- def firstValue(self, asDate=False):
- value = super(TimeSeries, self).firstValue()
+ def first_value(self, asDate=False):
+ firstIndex = corelib.first_unmasked(self)
if asDate:
- return tsdate.Date(freq=self.freq, value=value)
+ return self.start_date() + firstIndex
else:
- return value
+ return firstIndex
- def lastValue(self, asDate=False):
- value = super(TimeSeries, self).lastValue()
+ def last_value(self, asDate=False):
+ lastIndex = corelib.last_unmasked(self)
if asDate:
- return tsdate.Date(freq=self.freq, value=value)
+ return self.start_date() + lastIndex
else:
- return value
+ return lastIndex
+
+ def start_date(self): return self.__start_date
+ def end_date(self): return self.__start_date + (self.shape[0] - 1)
+
+ def date_to_index(self, date):
+ if date.freq != self.freq: raise ValueError("date.freq != self.freq")
+ return date - self.start_date()
+
+
+ # built-in methods
- ### DATA
-
- def __add__(self, other):
- validOpInputs(self, other)
- return SAtoTS(super(TimeSeries, self).__add__(other), self.freq, self.observed)
-
- def __radd__(self, other):
- validOpInputs(self, other)
- return SAtoTS(super(TimeSeries, self).__add__(other), self.freq, self.observed)
-
- def __sub__(self, other):
- validOpInputs(self, other)
- return SAtoTS(super(TimeSeries, self).__sub__(other), self.freq, self.observed)
-
- def __rsub__(self, other):
- validOpInputs(self, other)
- return SAtoTS(super(TimeSeries, self).__rsub__(other), self.freq, self.observed)
-
- def __mul__(self, other):
- validOpInputs(self, other)
- return SAtoTS(super(TimeSeries, self).__mul__(other), self.freq, self.observed)
-
- def __rmul__(self, other):
- validOpInputs(self, other)
- return SAtoTS(super(TimeSeries, self).__rmul__(other), self.freq, self.observed)
-
- def __div__(self, other):
- validOpInputs(self, other)
- return SAtoTS(super(TimeSeries, self).__div__(other), self.freq, self.observed)
-
- def __rdiv__(self, other):
- validOpInputs(self, other)
- return SAtoTS(super(TimeSeries, self).__rdiv__(other), self.freq, self.observed)
-
- def __pow__(self, other):
- validOpInputs(self, other)
- return SAtoTS(super(TimeSeries, self).__pow__(other), self.freq, self.observed)
-
- ### IN PLACE
-
+ def __and__(self, other): return bitwise_and(self, other)
+ def __or__(self, other): return bitwise_or(self, other)
+ def __xor__(self, other): return bitwise_xor(self, other)
+ __rand__ = __and__
+ __ror__ = __or__
+ __rxor__ = __xor__
+ def __abs__(self): return absolute(self)
+ def __neg__(self): return negative(self)
+ def __pos__(self): return TimeSeries(self)
+ def __add__(self, other): return add(self, other)
+ __radd__ = __add__
+ def __mod__ (self, other): return remainder(self, other)
+ def __rmod__ (self, other): return remainder(other, self)
+ def __lshift__ (self, n): return left_shift(self, n)
+ def __rshift__ (self, n): return right_shift(self, n)
+ def __sub__(self, other): return subtract(self, other)
+ def __rsub__(self, other): return subtract(other, self)
+ def __mul__(self, other): return multiply(self, other)
+ __rmul__ = __mul__
+ def __div__(self, other): return divide(self, other)
+ def __rdiv__(self, other): return divide(other, self)
+ def __truediv__(self, other): return true_divide(self, other)
+ def __rtruediv__(self, other): return true_divide(other, self)
+ def __floordiv__(self, other): return floor_divide(self, other)
+ def __rfloordiv__(self, other): return floor_divide(other, self)
+ def __pow__(self, other, third=None): return power(self, other, third)
+ def __sqrt__(self): return sqrt(self)
+
def __iadd__(self, other):
- validOpInputs(self, other)
- self = SAtoTS(super(TimeSeries, self).__add__(other), self.freq, self.observed)
- return self
-
- def __isub__(self, other):
- validOpInputs(self, other)
- self = SAtoTS(super(TimeSeries, self).__sub__(other), self.freq, self.observed)
- return self
-
+ return self + other
+
def __imul__(self, other):
- validOpInputs(self, other)
- self = SAtoTS(super(TimeSeries, self).__mul__(other), self.freq, self.observed)
- return self
-
- def __idiv__(self, other):
- validOpInputs(self, other)
- self = SAtoTS(super(TimeSeries, self).__div__(other), self.freq, self.observed)
- return self
-
- # this overrides & and should only be used by boolean series
- def __and__(self, other):
- validOpInputs(self, other)
return self * other
- # this overrides | and should only be used by boolean series
- def __or__(self, other):
- validOpInputs(self, other)
- return ~(~self & ~other)
-
- # this overrides ~ and should only be used by boolean series
- # it is our "not" operator
- def __invert__(self):
- return self == False
-
- ### COMPARISON
-
- def __eq__(self, other):
- validOpInputs(self, other)
- return SAtoTS(super(TimeSeries, self).__eq__(other), self.freq, self.observed)
-
- def __le__(self, other):
- validOpInputs(self, other)
- return SAtoTS(super(TimeSeries, self).__le__(other), self.freq, self.observed)
-
- def __lt__(self, other):
- validOpInputs(self, other)
- return SAtoTS(super(TimeSeries, self).__lt__(other), self.freq, self.observed)
-
- def __ge__(self, other):
- validOpInputs(self, other)
- return SAtoTS(super(TimeSeries, self).__ge__(other), self.freq, self.observed)
-
- def __gt__(self, other):
- validOpInputs(self, other)
- return SAtoTS(super(TimeSeries, self).__gt__(other), self.freq, self.observed)
+ def __isub__(self, other):
+ return self - other
+ def __idiv__(self, other):
+ return self / other
+
+ def __eq__(self, other): return equal(self,other)
+ def __ne__(self, other): return not_equal(self,other)
+ def __lt__(self, other): return less(self,other)
+ def __le__(self, other): return less_equal(self,other)
+ def __gt__(self, other): return greater(self,other)
+ def __ge__(self, other): return greater_equal(self,other)
+
+ def astype (self, tc):
+ "return self as array of given type."
+ d = self._data.astype(tc)
+ return datawrap(ma.array(d, mask=self._mask), self)
+
+ def filled (self, fill_value=None, ts=False):
+ d = super(TimeSeries, self).filled(fill_value)
+ if ts: return datawrap(d, self)
+ else: return d
+
+
+def datawrap(data, ts): return TimeSeries(data, freq=ts.freq, observed=ts.observed, start_date=ts.start_date())
+
+## wrappers for numpy.ma funcs
+
+sqrt = ts_unary_operation(ma.sqrt)
+log = ts_unary_operation(ma.log)
+log10 = ts_unary_operation(ma.log10)
+exp = ts_unary_operation(ma.exp)
+sin = ts_unary_operation(ma.sin)
+cos = ts_unary_operation(ma.cos)
+tan = ts_unary_operation(ma.tan)
+arcsin = ts_unary_operation(ma.arcsin)
+arccos = ts_unary_operation(ma.arccos)
+arctan = ts_unary_operation(ma.arctan)
+power = ts_binary_operation(ma.power)
+
+arcsinh = ts_unary_operation(ma.arcsinh)
+arccosh = ts_unary_operation(ma.arccosh)
+arctanh = ts_unary_operation(ma.arctanh)
+sinh = ts_unary_operation(ma.sinh)
+cosh = ts_unary_operation(ma.cosh)
+tanh = ts_unary_operation(ma.tanh)
+absolute = ts_unary_operation(ma.absolute)
+fabs = ts_unary_operation(ma.fabs)
+negative = ts_unary_operation(ma.negative)
+
+def nonzero(a): return datawrap(ma.nonzero(a), a)
+def zeros(shape, dtype=float, freq=None, start_date=None, observed=None):
+ return TimeSeries(ma.zeros(shape, dtype), freq=freq, start_date=start_date, observed=observed)
+def ones(shape, dtype=float, freq=None, start_date=None, observed=None):
+ return TimeSeries(ma.ones(shape, dtype), freq=freq, start_date=start_date, observed=observed)
+
+count = ma.count
+sum = ma.sum
+product = ma.product
+average = ma.average
+
+
+
+around = ts_unary_operation(ma.around)
+floor = ts_unary_operation(ma.floor)
+ceil = ts_unary_operation(ma.ceil)
+logical_not = ts_unary_operation(ma.logical_not)
+
+add = ts_binary_operation(ma.add)
+subtract = ts_binary_operation(ma.subtract)
+
+multiply = ts_binary_operation(ma.multiply)
+divide = ts_binary_operation(ma.divide)
+true_divide = ts_binary_operation(ma.true_divide)
+floor_divide = ts_binary_operation(ma.floor_divide)
+remainder = ts_binary_operation(ma.remainder)
+fmod = ts_binary_operation(ma.fmod)
+hypot = ts_binary_operation(ma.hypot)
+arctan2 = ts_binary_operation(ma.arctan2)
+equal = ts_binary_operation(ma.equal)
+not_equal = ts_binary_operation(ma.not_equal)
+less_equal = ts_binary_operation(ma.less_equal)
+greater_equal = ts_binary_operation(ma.greater_equal)
+less = ts_binary_operation(ma.less)
+greater = ts_binary_operation(ma.greater)
+logical_and = ts_binary_operation(ma.logical_and)
+logical_or = ts_binary_operation(ma.logical_or)
+logical_xor = ts_binary_operation(ma.logical_xor)
+bitwise_and = ts_binary_operation(ma.bitwise_and)
+bitwise_or = ts_binary_operation(ma.bitwise_or)
+bitwise_xor = ts_binary_operation(ma.bitwise_xor)
+
+def left_shift (a, n): return datawrap(ma.left_shift(a, n), a)
+def right_shift (a, n): return datawrap(ma.right_shift(a, n), a)
+
+# time series specific functions
+
def tser(start, end):
if start.freq != end.freq:
raise ValueError("start and end dates must have same frequency!")
- return TimeSeries(numpy.arange(int(start), int(end)+1), dtype=corelib.freqTypeMapping[start.freq], freq=start.freq, observed='END', startIndex=int(start))
+ return TimeSeries(numpy.arange(int(start), int(end)+1), dtype=corelib.freqTypeMapping[start.freq], freq=start.freq, start_date=start)
def year(dateSer):
return __getDateInfo(dateSer,'Y')
@@ -244,61 +393,50 @@
return __getDateInfo(dateSer,'W')
def __getDateInfo(dateSer,infoCode):
- newData = ma.array(cseries.getDateInfo(dateSer.data.filled(), dateSer.dtype.freq, infoCode))
- newData[dateSer.data.mask] = ma.masked
- newSer = copy.deepcopy(dateSer)
- newSer.data = newData
- newSer.dtype = numpy.int_
- return newSer
+ newData = ma.array(cseries.getDateInfo(dateSer.filled(), dateSer.tstype.freq, infoCode))
+ if dateSer.mask is not ma.nomask:
+ newData[dateSer.mask] = ma.masked
+ return datawrap(newData, dateSer)
-
-def validOpInputs(ser1, ser2):
- if isinstance(ser1, TimeSeries) and isinstance(ser2, TimeSeries) and ser1.freq != ser2.freq:
- raise "operation cannot be performed on series with different frequencies ("+str(ser1.freq) + " and " + str(ser2.freq)+")"
+def adjust_endpoints(a, start_date=None, end_date=None):
+ """adjust_endpoints(a, start_date=None, end_date=None) returns a new
+ TimeSeries going from start_date to end_date"""
-def SAtoTS(values, freq, observed, dtype=None):
- if dtype is None: _dtype = values.dtype
- else: _dtype = dtype
- return TimeSeries(values.data, dtype=_dtype, freq=freq, observed=observed, startIndex=values.indexZeroRepresents)
+ if start_date is None: start_date = a.start_date()
+ if end_date is None: end_date = a.end_date()
-
-# math functions (two series)
-def add(ser1, ser2, fill_value=ma.masked):
- return apply_func_twoseries(ma.add, ser1, ser2, fill_value)
-
-def multiply(ser1, ser2, fill_value=ma.masked):
- return apply_func_twoseries(ma.multiply, ser1, ser2, fill_value)
-
-def divide(ser1, ser2, fill_value=ma.masked):
- return apply_func_twoseries(ma.divide, ser1, ser2, fill_value)
+ tmpShape = list(a.shape)
+ tmpShape[0] = max(end_date - start_date + 1, 0)
+ tmpShape = tuple(tmpShape)
-def subtract(ser1, ser2, fill_value=ma.masked):
- return apply_func_twoseries(ma.subtract, ser1, ser2, fill_value)
+ tmpSer = TimeSeries(ma.resize(a, tmpShape), freq=a.freq, observed=a.observed, start_date=start_date)
-# math functions (one series, return series)
-def sqrt(ser):
- return apply_func_oneseries(ma.sqrt, ser)
+ setStart, setEnd = max(start_date, a.start_date()), min(end_date, a.end_date())
+ setLen = setEnd - setStart
-# math functions (one series, return scalar)
-def sum(ser):
- return ma.sum(ser.data)
+ tmpSer[:] = ma.masked
+
+ if setLen >= 0:
+ tmpSer[tmpSer.date_to_index(setStart):tmpSer.date_to_index(setEnd)+1] = a[a.date_to_index(setStart):a.date_to_index(setEnd)+1]
+
+ return tmpSer
-def product(ser):
- return ma.product(ser.data)
+
+def aligned(*series, **kwargs):
-def average(ser):
- return ma.average(ser.data)
+ if len(series) < 2:
+ return series
+
+ freq = series[0].freq
-def where(condition, x, y):
- tempResult = ma.where(condition.data, x, y)
- return TimeSeries(tempResult, dtype=numpy.bool_, freq=condition.freq, observed=condition.observed, startIndex=condition.indexZeroRepresents)
-
-# generic functions
-def apply_func_twoseries(func, ser1, ser2, fill_value=ma.masked):
- validOpInputs(ser1, ser2)
- return SAtoTS(doFunc(ser1, ser2, func, fill_value=fill_value), ser1.freq, ser1.observed)
+ if len(set([x.freq for x in series])) > 1: raise ValueError("All series must have same frequency!")
-def apply_func_oneseries(func, ser):
- return SAtoTS(doFunc_oneseries(ser, func),ser.freq, ser.observed)
+ if 'start_date' in kwargs: start_date = kwargs['start_date']
+ else: start_date = min([x.start_date() for x in series])
+ if 'end_date' in kwargs: end_date = kwargs['end_date']
+ else: end_date = max([x.end_date() for x in series])
+
+ return [adjust_endpoints(x, start_date=start_date, end_date=end_date) for x in series]
+
\ No newline at end of file
More information about the Scipy-svn
mailing list