Various strings to dates.

John Roth newsgroups at jhrothjr.com
Sat Jan 24 01:18:23 CET 2004


"Amy G" <amy-g-art at cox.net> wrote in message
news:4GfQb.16187$AA6.14368 at fed1read03...
> I have seen something about this beofore on this forum, but my google
search
> didn't come up with the answer I am looking for.
>
> I have a list of tuples.  Each tuple is in the following format:
>
> ("data", "moredata", "evenmoredata", "date string")
>
> The date string is my concern.  This is the date stamp from an email.
> The problem is that I have a whole bunch of variations when it comes to
the
> format that the date string is in.  For example I could have the following
> two tuples:
>
> ("data", "moredata", "evenmoredata", "Fri, 23 Jan 2004 00:06:15")
> ("data", "moredata", "evenmoredata", "Thursday, 22 January 2004 03:15:06")
>
> I know there is some way to use the date string from each of these to get
a
> date usable by python, but I cannot figure it out.
> I was trying to use time.strptime but have been unsuccesful thus far.
>
> Any help is appreciated.

This is what I use to parse dates of unknown provinance.
It's laughably overengineered, and I don't include the day
of the week or the time. Given your examples, though,
those should be easy enough to deal with.

HTH
John Roth

class DateContainer(object):
    _typeDict = {}
    _stringValue = ""
    _typeDict["stringValue"] = "String"
    _typeDict["value"] = "String"
    _value = ""
    year = 1
    month = 1
    day = 1
    bc = ""

    def _checkUserFriendlyDate(self, date):
        # The rules for a user friendly date are:
        # 1. The year must be at least three digits, including
        #    leading zeroes if necessary. Day and numeric month
        #    may be no longer than 2 digits.
        # 2. The month may be alphabetic or numeric. If it's
        #    alphabetic, it must be at least three letters long.
        # 3. The epoch may be ad, bc, bce or ce. If omitted, it's
        #    assumed to be ad.
        # 4. After removing the year, epoch and an alphabetic month,
        #    the remaining single piece is the day, or the piece that
        #    is greater than 12.
        # 5. If two pieces remain, the first is the month, the second
        #    is the day. Both are between 1 and 12, inclusive.
        partList = dateTimeParse(date)
        if not(2 < len(partList) < 5):
            raise ValueError, "incorrect part list: %s" % (partList,)
        bc = self._findBC(partList)
        if len(partList) != 3:
            return "too many components in date: '%s'" % date
        year = self._findYear(partList)
        month = self._findAlphaMonth(partList)
        if month != 0:
            day = partList[0]
        else:
            day = self._findDay(partList)
            if day:
                month = partList[0]
            else:
                month, day = partList
        year = self._checkNum(year, 4712)
        day = self._checkNum(day, 31)
        month = self._checkNum(month, 12)
        if bc in ("AD", "CE"):
            bc = ""
        self.year, self.month, self.day, self.bc = year, month, day, bc
        return True

    def _checkNum(self, num, limit):
        result = int(num)
        if result > limit:
            raise ValueError, "number '%s' out of range '%s'" % (num, limit)
        return result

    def _findBC(self, partList):
        for i in range(len(partList)):
            word = partList[i]
            if word in ("AD", "BC", "CE", "BCE"):
                del partList[i]
                return word
        # XXX if len(partList > 3): error
        return ""

    def _findYear(self, partList):
        for i in range(len(partList)):
            word = partList[i]
            if len(word) > 2 and word.isdigit():
                del partList[i]
                return word
        raise ValueError

    def _findAlphaMonth(self, partList):
        for i in range(len(partList)):
            word = partList[i]
            if word.isalpha():
                del partList[i]
                return ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN',
                        'JUL', 'AUG', 'SEP', 'OCT', 'NOV',
'DEC'].index(word[:3]) + 1
        return 0

    def _findDay(self, partList):
        for i in range(len(partList)):
            word = partList[i]
            if word.isdigit() and int(word) > 12:
                del partList[i]
                return word
        return ""

    def _getStringValue(self):
        return self._stringValue

    def _setStringValue(self, value):
        self._checkUserFriendlyDate(value)
        self._stringValue = value

    _typeDict["stringValue"] = "String"
    stringValue = property(_getStringValue, _setStringValue,
                           doc="User Friendly Date")

    def _getValue(self):
        isoDate = "%04u-%02u-%02u %s" % (self.year, self.month, self.day,
self.bc)
        return isoDate.strip()

    def checkISODate(self, value):
        year = self._checkNum(value[:4], 4712)
        month = self._checkNum(value[5:7], 12)
        day = self._checkNum(value[8:10], 31)
        if len(value) > 10:
            bc = value[11:]
            if not (bc.upper() in ("AD", "BC", "BCE", "CE")):
                raise ValueError
        if bc in ("AD", "CE"):
            bc = ""
        self.year, self.month, self.day, self.bc = year, month, day, bc
        return

    def _setValue(self, value):
        self._checkISODate(value)
        isoDate = "%04u-%02u-%02u %s" % (self.year, self.month, self.day,
self.bc)
        self.stringValue = isoDate
        return None

    value = property(_getValue, _setValue,
                     doc = "ISO Standard Format Date")
>
>





More information about the Python-list mailing list