[Python-checkins] python/nondist/sandbox/datetime datetime.py,1.132,1.133 doc.txt,1.76,1.77 test_datetime.py,1.91,1.92

tim_one@users.sourceforge.net tim_one@users.sourceforge.net
Wed, 01 Jan 2003 12:52:15 -0800


Update of /cvsroot/python/python/nondist/sandbox/datetime
In directory sc8-pr-cvs1:/tmp/cvs-serv5679

Modified Files:
	datetime.py doc.txt test_datetime.py 
Log Message:
A quicker astimezone() implementation, rehabilitating an earlier
suggestion from Guido, along with a formal correctness proof of the
trickiest bit.  The intricacy of the proof reveals how delicate this
is, but also how robust the conclusion:  correctness doesn't rely on
dst() returning +- one hour (not all real time zones do!), it only
relies on:

1. That dst() returns a (any) non-zero value if and only if daylight
   time is in effect.

and

2. That the tzinfo subclass implements a consistent notion of time zone.

The meaning of "consistent" was a hidden assumption, which is now an
explicit requirement in the docs.  Alas, it's an unverifiable (by the
datetime implementation) requirement, but so it goes.


Index: datetime.py
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/datetime/datetime.py,v
retrieving revision 1.132
retrieving revision 1.133
diff -C2 -d -r1.132 -r1.133
*** datetime.py	1 Jan 2003 04:12:00 -0000	1.132
--- datetime.py	1 Jan 2003 20:52:10 -0000	1.133
***************
*** 1620,1628 ****
      def astimezone(self, tz):
          _check_tzinfo_arg(tz)
!         # This is somewhat convoluted because we can only call
!         # tzinfo.utcoffset(dt) when dt.tzinfo is tzinfo.  It's more
!         # convoluted due to DST headaches (redundant spellings and
!         # "missing" hours in local time -- see the tests for details).
!         other = self.replace(tzinfo=tz) # this does no conversion
  
          # Don't call utcoffset unless necessary.  First check trivial cases.
--- 1620,1624 ----
      def astimezone(self, tz):
          _check_tzinfo_arg(tz)
!         other = self.replace(tzinfo=tz)
  
          # Don't call utcoffset unless necessary.  First check trivial cases.
***************
*** 1639,1699 ****
              return other
  
!         total_added_to_other = otoff - myoff
!         other += total_added_to_other
!         # If tz is a fixed-offset class, we're done, but we can't know
!         # whether it is.  If it's a DST-aware class, and we're not near a
!         # DST boundary, we're also done.  If we crossed a DST boundary,
!         # the offset will be different now, and that's our only clue.
!         # Unfortunately, we can be in trouble even if we didn't cross a
!         # DST boundary, if we landed on one of the DST "problem hours".
!         newoff = other.utcoffset()
!         if newoff is None:
!             self._inconsistent_utcoffset_error()
!         if newoff != otoff:
!             delta = newoff - otoff
!             total_added_to_other += delta
!             other += delta
              otoff = other.utcoffset()
              if otoff is None:
                  self._inconsistent_utcoffset_error()
  
!         # If this is the first hour of DST, it may be a local time that
!         # doesn't make sense on the local clock, in which case the naive
!         # hour before it (in standard time) is equivalent and does make
!         # sense on the local clock.  So force that.
!         alt = other - _HOUR
!         altoff = alt.utcoffset()
!         if altoff is None:
!             self._inconsistent_utcoffset_error()
!         # Are alt and other really the same time?  They are iff
!         # alt - altoff == other - otoff, iff
!         # (other - _HOUR) - altoff = other - otoff, iff
!         # otoff - altoff == _HOUR
!         # Note that the Python comparison "alt == other" would return false,
!         # though, because they have same tzinfo member, and utcoffset() is
!         # ignored when comparing times w/ the same tzinfo.
!         diff = otoff - altoff
! 
!         # Enable the assert if you're dubious; it's expensive.
!         ##assert ((diff == _HOUR) ==
!         ##        (alt.replace(tzinfo=None) - alt.utcoffset() ==
!         ##         other.replace(tzinfo=None) - other.utcoffset()))
!         if diff == _HOUR:
!             return alt      # use the local time that makes sense
! 
!         # There's still a problem with the unspellable (in local time)
!         # hour after DST ends.  other's local time now is
!         # self + total_added_to_other, so self == other iff
!         # self - myoff = other - otoff, iff
!         # self - myoff = self + total_added_to_other - otoff, iff
!         # total_added_to_other == otoff - myoff
!         ##assert (self == other) == (total_added_to_other == otoff - myoff)
!         if total_added_to_other == otoff - myoff:
              return other
-         # Else there's no way to spell self in zone other.tz.
          raise ValueError("astimezone():  the source datetimetz can't be "
                           "expressed in the target timezone's local time")
  
- 
      def isoformat(self, sep='T'):
          s = super(datetimetz, self).isoformat(sep)
--- 1635,1670 ----
              return other
  
!         # See the long comment block at the end of this file for an
!         # explanation of this algorithm.  That it always works requires a
!         # pretty intricate proof.
!         otdst = other.dst()
!         if otdst is None:
!             otdst = 0
!         total_added_to_other = otoff - otdst - myoff
!         if total_added_to_other:
!             other += total_added_to_other
              otoff = other.utcoffset()
              if otoff is None:
                  self._inconsistent_utcoffset_error()
+         # The distance now from self to other is
+         # self - other == naive(self) - myoff - (naive(other) - otoff) ==
+         # naive(self) - myoff -
+         #             ((naive(self) + total_added_to_other - otoff) ==
+         # - myoff - total_added_to_other + otoff
+         delta = otoff - myoff - total_added_to_other
+         ##assert (other == self) == (not delta) # expensive
+         if not delta:
+             return other
  
!         # Must have crossed a DST switch point.
!         total_added_to_other += delta
!         other += delta
!         otoff = other.utcoffset()
!         ##assert (other == self) == (otoff - myoff == total_added_to_other)
!         if otoff - myoff == total_added_to_other:
              return other
          raise ValueError("astimezone():  the source datetimetz can't be "
                           "expressed in the target timezone's local time")
  
      def isoformat(self, sep='T'):
          s = super(datetimetz, self).isoformat(sep)
***************
*** 1908,1911 ****
--- 1879,1993 ----
  del pickle
  
+ """
+ Some time zone algebra.  For a datetimetz x, let
+     x.n = x stripped of its timezone -- its naive time.
+     x.o = x.utcoffset(), and assuming that doesn't raise an exception or
+           return None
+     x.d = x.dst(), and assuming that doesn't raise an exception or
+           return None
+     x.s = x's standard offset, x.o - x.d
+ 
+ Now some derived rules, where k is a duration (timedelta).
+ 
+ 1. x.o = x.s + x.d
+    This follows from the definition of x.s.
+ 
+ 2. If x and y have the same tzinfo member, x.s == y.s.
+    This is actually a requirement, an assumption we need to make about
+    sane tzinfo classes.
+ 
+ 3. The naive UTC time corresponding to x is x.n - x.o.
+    This is again a requirement for a sane tzinfo class.
+ 
+ 4. (x+k).s = x.s
+    This follows from #2, and that datimetimetz+timedelta preserves tzinfo.
+ 
+ 5. (y+k).n = y.n + k
+    Again follows from how arithmetic is defined.
+ 
+ Now we can explain x.astimezone(tz).  Let's assume it's an interesting case
+ (meaning that the various tzinfo methods exist, and don't blow up or return
+ None when called).
+ 
+ The function wants to return a datetimetz y with timezone tz, equivalent to x.
+ 
+ By #3, we want
+ 
+     y.n - y.o = x.n - x.o                       [1]
+ 
+ The algorithm starts by attaching tz to x.n, and calling that y.  So
+ x.n = y.n at the start.  Then it wants to add a duration k to y, so that [1]
+ becomes true; in effect, we want to solve [2] for k:
+ 
+    (y+k).n - (y+k).o = x.n - x.o                [2]
+ 
+ By #1, this is the same as
+ 
+    (y+k).n - ((y+k).s + (y+k).d) = x.n - x.o    [3]
+ 
+ By #5, (y+k).n = y.n + k, which equals x.n + k because x.n=y.n at the start.
+ Substituting that into [3],
+ 
+    x.n + k - (y+k).s - (y+k).d = x.n - x.o; the x.n terms cancel, leaving
+    k - (y+k).s - (y+k).d = - x.o; rearranging,
+    k = (y+k).s - x.o - (y+k).d; by #4, (y+k).s == y.s, so
+    k = y.s - x.o - (y+k).d; then by #1, y.s = y.o - y.d, so
+    k = y.o - y.d - x.o - (y+k).d
+ 
+ On the RHS, (y+k).d can't be computed directly, but all the rest can be, and
+ we approximate k by ignoring the (y+k).d term at first.  Note that k can't
+ be very large, since all offset-returning methods return a duration of
+ magnitude less than 24 hours.  For that reason, if y is firmly in std time,
+ (y+k).d must be 0, so ignoring it has no consequence then.
+ 
+ In any case, the new value is
+ 
+     z = y + y.o - y.d - x.o
+ 
+ If
+     z.n - z.o = x.n - x.o                       [4]
+ 
+ then, we have an equivalent time, and are almost done.  The insecurity here is
+ at the start of daylight time.  Picture US Eastern for concreteness.  The wall
+ time jumps from 1:59 to 3:00, and wall hours of the form 2:MM don't make good
+ sense then.  A sensible Eastern tzinfo class will consider such a time to be
+ EDT (because it's "after 2"), which is a redundant spelling of 1:MM EST on the
+ day DST starts.  We want to return the 1:MM EST spelling because that's
+ the only spelling that makes sense on the local wall clock.
+ 
+ Claim:  When [4] is true, we have "the right" spelling in this endcase.  No
+ further adjustment is necessary.
+ 
+ Proof:  The right spelling has z.d = 0, and the wrong spelling has z.d != 0
+ (for US Eastern, the wrong spelling has z.d = 60 minutes, but we can't assume
+ that all time zones work this way -- we can assume a time zone is in daylight
+ time iff dst() doesn't return 0).  By [4], and recalling that z.o = z.s + z.d,
+ 
+     z.n - z.s - z.d = x.n - x.o                 [5]
+ 
+ Also
+ 
+     z.n = (y + y.o - y.d - x.o).n by the construction of z, which equals
+           y.n + y.o - y.d - x.o by #5.
+ 
+ Plugging that into [5],
+ 
+     y.n + y.o - y.d - x.o - z.s - z.d = x.n - x.o; cancelling the x.o terms,
+     y.n + y.o - y.d - z.s - z.d = x.n; but x.n = y.n too, so they also cancel,
+     y.o - y.d - z.s - z.d = 0; then y.o = y.s + y.d, so
+     y.s + y.d - y.d - z.s - z.d = 0; then the y.d terms cancel,
+     y.s - z.s - z.d = 0; but y and z are in the same timezone, so by #2
+                          y.s = z.s, and they also cancel, leaving
+     - z.d = 0; or,
+     z.d = 0
+ 
+ Therefore z is the standard-time spelling, and there's nothing left to do in
+ this case.
+ 
+ Note that we actually proved something stronger:  when [4] is true, it must
+ also be true that z.dst() returns 0.
+ 
+ XXX Flesh out the rest of the algorithm.
+ """
  def _test():
      import test_datetime

Index: doc.txt
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/datetime/doc.txt,v
retrieving revision 1.76
retrieving revision 1.77
diff -C2 -d -r1.76 -r1.77
*** doc.txt	31 Dec 2002 15:56:31 -0000	1.76
--- doc.txt	1 Jan 2003 20:52:11 -0000	1.77
***************
*** 815,818 ****
--- 815,830 ----
      should be set.
  
+     An instance tz of a tzinfo subclass that models both standard and
+     daylight times must be consistent in this sense:
+ 
+         tz.utcoffset(dt) - tz.dst(dt)
+ 
+     must return the same result for every datetimetz dt with dt.tzinfo=tz.
+     For sane tzinfo subclasses, this expression yields the time zone's
+     "standard offset", which should not depend on the specific date and
+     time passed.  The implementation of datetimetz.astimezone() relies on
+     this, but cannot detect violations; it's the programmer's
+     responsibility to ensure it.
+ 
  These methods are called by a datetimetz or timetz object, in response to
  their methods of the same names.  A datetimetz object passes itself as the
***************
*** 1160,1164 ****
      XXX classes, one hour per year has two spellings in local time, and
      XXX another hour has no spelling in local time.
!     
  
    - timetuple()
--- 1172,1176 ----
      XXX classes, one hour per year has two spellings in local time, and
      XXX another hour has no spelling in local time.
! 
  
    - timetuple()

Index: test_datetime.py
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/datetime/test_datetime.py,v
retrieving revision 1.91
retrieving revision 1.92
diff -C2 -d -r1.91 -r1.92
*** test_datetime.py	1 Jan 2003 04:12:00 -0000	1.91
--- test_datetime.py	1 Jan 2003 20:52:12 -0000	1.92
***************
*** 2718,2721 ****
--- 2718,2746 ----
          # self.convert_between_tz_and_utc(Central, Eastern)  # can't work
  
+     def test_tricky(self):
+         # 22:00 on day before daylight starts.
+         fourback = self.dston - timedelta(hours=4)
+         ninewest = FixedOffset(-9*60, "-0900", 0)
+         fourback = fourback.astimezone(ninewest)
+         # 22:00-0900 is 7:00 UTC == 2:00 EST == 3:00 DST.  Since it's "after
+         # 2", we should get the 3 spelling.
+         # If we plug 22:00 the day before into Eastern, it "looks like std
+         # time", so its offset is returned as -5, and -5 - -9 = 4.  Adding 4
+         # to 22:00 lands on 2:00, which makes no sense in local time (the
+         # local clock jumps from 1 to 3).  The point here is to make sure we
+         # get the 3 spelling.
+         expected = self.dston.replace(hour=3)
+         got = fourback.astimezone(Eastern).astimezone(None)
+         self.assertEqual(expected, got)
+ 
+         # Similar, but map to 6:00 UTC == 1:00 EST == 2:00 DST.  In that
+         # case we want the 1:00 spelling.
+         sixutc = self.dston.replace(hour=6).astimezone(utc_real)
+         # Now 6:00 "looks like daylight", so the offset wrt Eastern is -4,
+         # and adding -4-0 == -4 gives the 2:00 spelling.  We want the 1:00 EST
+         # spelling.
+         expected = self.dston.replace(hour=1)
+         got = sixutc.astimezone(Eastern).astimezone(None)
+         self.assertEqual(expected, got)
  
  def test_suite():