[Python-checkins] gh-80010: Expand fromisoformat to include most of ISO-8601 (#92177)

pganssle webhook-mailer at python.org
Thu May 5 20:31:28 EDT 2022


https://github.com/python/cpython/commit/1303f8c927227b72d9ee9eae890be4692b4d4592
commit: 1303f8c927227b72d9ee9eae890be4692b4d4592
branch: main
author: Paul Ganssle <1377457+pganssle at users.noreply.github.com>
committer: pganssle <1377457+pganssle at users.noreply.github.com>
date: 2022-05-05T18:31:24-06:00
summary:

gh-80010: Expand fromisoformat to include most of ISO-8601 (#92177)

This expands `fromisoformat` to cover most of the common uses of ISO 8601. We may expand the scope more in the future.

files:
A Misc/NEWS.d/next/Library/2022-05-03-12-11-27.gh-issue-80010.yG54RE.rst
M Doc/library/datetime.rst
M Doc/whatsnew/3.11.rst
M Lib/datetime.py
M Lib/test/datetimetester.py
M Modules/_datetimemodule.c

diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst
index ca17dc880cfb3..e0b28d7cb978d 100644
--- a/Doc/library/datetime.rst
+++ b/Doc/library/datetime.rst
@@ -526,18 +526,20 @@ Other constructors, all class methods:
 
 .. classmethod:: date.fromisoformat(date_string)
 
-   Return a :class:`date` corresponding to a *date_string* given in the format
-   ``YYYY-MM-DD``::
+   Return a :class:`date` corresponding to a *date_string* given in any valid
+   ISO 8601 format, except ordinal dates (e.g. ``YYYY-DDD``)::
 
       >>> from datetime import date
       >>> date.fromisoformat('2019-12-04')
       datetime.date(2019, 12, 4)
-
-   This is the inverse of :meth:`date.isoformat`. It only supports the format
-   ``YYYY-MM-DD``.
+      >>> date.fromisoformat('20191204')
+      datetime.date(2019, 12, 4)
+      >>> date.fromisoformat('2021-W01-1')
+      datetime.date(2021, 1, 4)
 
    .. versionadded:: 3.7
-
+   .. versionchanged:: 3.11
+      Previously, this method only supported the format ``YYYY-MM-DD``.
 
 .. classmethod:: date.fromisocalendar(year, week, day)
 
@@ -710,8 +712,6 @@ Instance methods:
        >>> date(2002, 12, 4).isoformat()
        '2002-12-04'
 
-   This is the inverse of :meth:`date.fromisoformat`.
-
 .. method:: date.__str__()
 
    For a date *d*, ``str(d)`` is equivalent to ``d.isoformat()``.
@@ -994,31 +994,29 @@ Other constructors, all class methods:
 
 .. classmethod:: datetime.fromisoformat(date_string)
 
-   Return a :class:`.datetime` corresponding to a *date_string* in one of the
-   formats emitted by :meth:`date.isoformat` and :meth:`datetime.isoformat`.
-
-   Specifically, this function supports strings in the format:
+   Return a :class:`.datetime` corresponding to a *date_string* in any valid
+   ISO 8601 format, with the following exceptions:
 
-   .. code-block:: none
-
-      YYYY-MM-DD[*HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]]]
-
-   where ``*`` can match any single character.
-
-   .. caution::
-
-     This does *not* support parsing arbitrary ISO 8601 strings - it is only intended
-     as the inverse operation of :meth:`datetime.isoformat`. A more full-featured
-     ISO 8601 parser, ``dateutil.parser.isoparse`` is available in the third-party package
-     `dateutil <https://dateutil.readthedocs.io/en/stable/parser.html#dateutil.parser.isoparse>`__.
+   1. Time zone offsets may have fractional seconds.
+   2. The `T` separator may be replaced by any single unicode character.
+   3. Ordinal dates are not currently supported.
+   4. Fractional hours and minutes are not supported.
 
    Examples::
 
        >>> from datetime import datetime
        >>> datetime.fromisoformat('2011-11-04')
        datetime.datetime(2011, 11, 4, 0, 0)
+       >>> datetime.fromisoformat('20111104')
+       datetime.datetime(2011, 11, 4, 0, 0)
        >>> datetime.fromisoformat('2011-11-04T00:05:23')
        datetime.datetime(2011, 11, 4, 0, 5, 23)
+       >>> datetime.fromisoformat('2011-11-04T00:05:23Z')
+       datetime.datetime(2011, 11, 4, 0, 5, 23, tzinfo=datetime.timezone.utc)
+       >>> datetime.fromisoformat('20111104T000523')
+       datetime.datetime(2011, 11, 4, 0, 5, 23)
+       >>> datetime.fromisoformat('2011-W01-2T00:05:23.283')
+       datetime.datetime(2011, 1, 4, 0, 5, 23, 283000)
        >>> datetime.fromisoformat('2011-11-04 00:05:23.283')
        datetime.datetime(2011, 11, 4, 0, 5, 23, 283000)
        >>> datetime.fromisoformat('2011-11-04 00:05:23.283+00:00')
@@ -1028,6 +1026,10 @@ Other constructors, all class methods:
            tzinfo=datetime.timezone(datetime.timedelta(seconds=14400)))
 
    .. versionadded:: 3.7
+   .. versionchanged:: 3.11
+      Previously, this method only supported formats that could be emitted by
+      :meth:`date.isoformat()` or :meth:`datetime.isoformat()`.
+
 
 .. classmethod:: datetime.fromisocalendar(year, week, day)
 
@@ -1763,30 +1765,41 @@ Other constructor:
 
 .. classmethod:: time.fromisoformat(time_string)
 
-   Return a :class:`.time` corresponding to a *time_string* in one of the
-   formats emitted by :meth:`time.isoformat`. Specifically, this function supports
-   strings in the format:
-
-   .. code-block:: none
-
-      HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]]
-
-   .. caution::
+   Return a :class:`.time` corresponding to a *time_string* in any valid
+   ISO 8601 format, with the following exceptions:
 
-     This does *not* support parsing arbitrary ISO 8601 strings. It is only
-     intended as the inverse operation of :meth:`time.isoformat`.
+   1. Time zone offsets may have fractional seconds.
+   2. The leading `T`, normally required in cases where there may be ambiguity between
+      a date and a time, is not required.
+   3. Fractional seconds may have any number of digits (anything beyond 6 will
+      be truncated).
+   4. Fractional hours and minutes are not supported.
 
    Examples::
 
        >>> from datetime import time
        >>> time.fromisoformat('04:23:01')
        datetime.time(4, 23, 1)
+       >>> time.fromisoformat('T04:23:01')
+       datetime.time(4, 23, 1)
+       >>> time.fromisoformat('T042301')
+       datetime.time(4, 23, 1)
        >>> time.fromisoformat('04:23:01.000384')
        datetime.time(4, 23, 1, 384)
+       >>> time.fromisoformat('04:23:01,000')
+       datetime.time(4, 23, 1, 384)
        >>> time.fromisoformat('04:23:01+04:00')
        datetime.time(4, 23, 1, tzinfo=datetime.timezone(datetime.timedelta(seconds=14400)))
+       >>> time.fromisoformat('04:23:01Z')
+       datetime.time(4, 23, 1, tzinfo=datetime.timezone.utc)
+       >>> time.fromisoformat('04:23:01+00:00')
+       datetime.time(4, 23, 1, tzinfo=datetime.timezone.utc)
+
 
    .. versionadded:: 3.7
+   .. versionchanged:: 3.11
+      Previously, this method only supported formats that could be emitted by
+      :meth:`time.isoformat()`.
 
 
 Instance methods:
diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst
index 87dc5ddb4e611..efcfa17600879 100644
--- a/Doc/whatsnew/3.11.rst
+++ b/Doc/whatsnew/3.11.rst
@@ -425,6 +425,14 @@ asyncio
   existing stream-based connections to TLS. (Contributed by Ian Good in
   :issue:`34975`.)
 
+datetime
+--------
+
+* :meth:`datetime.date.fromisoformat`, :meth:`datetime.time.fromisoformat` and
+  :meth:`datetime.datetime.fromisoformat` can now be used to parse most ISO 8601
+  formats (barring only those that support fractional hours and minutes).
+  (Contributed by Paul Ganssle in :gh:`80010`.)
+
 fractions
 ---------
 
diff --git a/Lib/datetime.py b/Lib/datetime.py
index 7f79aa436eb5e..afbb6fed2ecb6 100644
--- a/Lib/datetime.py
+++ b/Lib/datetime.py
@@ -262,58 +262,150 @@ def _wrap_strftime(object, format, timetuple):
     return _time.strftime(newformat, timetuple)
 
 # Helpers for parsing the result of isoformat()
+def _is_ascii_digit(c):
+    return c in "0123456789"
+
+def _find_isoformat_datetime_separator(dtstr):
+    # See the comment in _datetimemodule.c:_find_isoformat_datetime_separator
+    len_dtstr = len(dtstr)
+    if len_dtstr == 7:
+        return 7
+
+    assert len_dtstr > 7
+    date_separator = "-"
+    week_indicator = "W"
+
+    if dtstr[4] == date_separator:
+        if dtstr[5] == week_indicator:
+            if len_dtstr < 8:
+                raise ValueError("Invalid ISO string")
+            if len_dtstr > 8 and dtstr[8] == date_separator:
+                if len_dtstr == 9:
+                    raise ValueError("Invalid ISO string")
+                if len_dtstr > 10 and _is_ascii_digit(dtstr[10]):
+                    # This is as far as we need to resolve the ambiguity for
+                    # the moment - if we have YYYY-Www-##, the separator is
+                    # either a hyphen at 8 or a number at 10.
+                    #
+                    # We'll assume it's a hyphen at 8 because it's way more
+                    # likely that someone will use a hyphen as a separator than
+                    # a number, but at this point it's really best effort
+                    # because this is an extension of the spec anyway.
+                    # TODO(pganssle): Document this
+                    return 8
+                return 10
+            else:
+                # YYYY-Www (8)
+                return 8
+        else:
+            # YYYY-MM-DD (10)
+            return 10
+    else:
+        if dtstr[4] == week_indicator:
+            # YYYYWww (7) or YYYYWwwd (8)
+            idx = 7
+            while idx < len_dtstr:
+                if not _is_ascii_digit(dtstr[idx]):
+                    break
+                idx += 1
+
+            if idx < 9:
+                return idx
+
+            if idx % 2 == 0:
+                # If the index of the last number is even, it's YYYYWwwd
+                return 7
+            else:
+                return 8
+        else:
+            # YYYYMMDD (8)
+            return 8
+
+
 def _parse_isoformat_date(dtstr):
-    # It is assumed that this function will only be called with a
-    # string of length exactly 10, and (though this is not used) ASCII-only
+    # It is assumed that this is an ASCII-only string of lengths 7, 8 or 10,
+    # see the comment on Modules/_datetimemodule.c:_find_isoformat_datetime_separator
+    assert len(dtstr) in (7, 8, 10)
     year = int(dtstr[0:4])
-    if dtstr[4] != '-':
-        raise ValueError('Invalid date separator: %s' % dtstr[4])
+    has_sep = dtstr[4] == '-'
+
+    pos = 4 + has_sep
+    if dtstr[pos:pos + 1] == "W":
+        # YYYY-?Www-?D?
+        pos += 1
+        weekno = int(dtstr[pos:pos + 2])
+        pos += 2
 
-    month = int(dtstr[5:7])
+        dayno = 1
+        if len(dtstr) > pos:
+            if (dtstr[pos:pos + 1] == '-') != has_sep:
+                raise ValueError("Inconsistent use of dash separator")
 
-    if dtstr[7] != '-':
-        raise ValueError('Invalid date separator')
+            pos += has_sep
 
-    day = int(dtstr[8:10])
+            dayno = int(dtstr[pos:pos + 1])
+
+        return list(_isoweek_to_gregorian(year, weekno, dayno))
+    else:
+        month = int(dtstr[pos:pos + 2])
+        pos += 2
+        if (dtstr[pos:pos + 1] == "-") != has_sep:
+            raise ValueError("Inconsistent use of dash separator")
+
+        pos += has_sep
+        day = int(dtstr[pos:pos + 2])
+
+        return [year, month, day]
+
+
+_FRACTION_CORRECTION = [100000, 10000, 1000, 100, 10]
 
-    return [year, month, day]
 
 def _parse_hh_mm_ss_ff(tstr):
-    # Parses things of the form HH[:MM[:SS[.fff[fff]]]]
+    # Parses things of the form HH[:?MM[:?SS[{.,}fff[fff]]]]
     len_str = len(tstr)
 
     time_comps = [0, 0, 0, 0]
     pos = 0
     for comp in range(0, 3):
         if (len_str - pos) < 2:
-            raise ValueError('Incomplete time component')
+            raise ValueError("Incomplete time component")
 
         time_comps[comp] = int(tstr[pos:pos+2])
 
         pos += 2
         next_char = tstr[pos:pos+1]
 
+        if comp == 0:
+            has_sep = next_char == ':'
+
         if not next_char or comp >= 2:
             break
 
-        if next_char != ':':
-            raise ValueError('Invalid time separator: %c' % next_char)
+        if has_sep and next_char != ':':
+            raise ValueError("Invalid time separator: %c" % next_char)
 
-        pos += 1
+        pos += has_sep
 
     if pos < len_str:
-        if tstr[pos] != '.':
-            raise ValueError('Invalid microsecond component')
+        if tstr[pos] not in '.,':
+            raise ValueError("Invalid microsecond component")
         else:
             pos += 1
 
             len_remainder = len_str - pos
-            if len_remainder not in (3, 6):
-                raise ValueError('Invalid microsecond component')
 
-            time_comps[3] = int(tstr[pos:])
-            if len_remainder == 3:
-                time_comps[3] *= 1000
+            if len_remainder >= 6:
+                to_parse = 6
+            else:
+                to_parse = len_remainder
+
+            time_comps[3] = int(tstr[pos:(pos+to_parse)])
+            if to_parse < 6:
+                time_comps[3] *= _FRACTION_CORRECTION[to_parse-1]
+            if (len_remainder > to_parse
+                    and not all(map(_is_ascii_digit, tstr[(pos+to_parse):]))):
+                raise ValueError("Non-digit values in unparsed fraction")
 
     return time_comps
 
@@ -321,27 +413,34 @@ def _parse_isoformat_time(tstr):
     # Format supported is HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]]
     len_str = len(tstr)
     if len_str < 2:
-        raise ValueError('Isoformat time too short')
+        raise ValueError("Isoformat time too short")
 
-    # This is equivalent to re.search('[+-]', tstr), but faster
-    tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1)
+    # This is equivalent to re.search('[+-Z]', tstr), but faster
+    tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1 or tstr.find('Z') + 1)
     timestr = tstr[:tz_pos-1] if tz_pos > 0 else tstr
 
     time_comps = _parse_hh_mm_ss_ff(timestr)
 
     tzi = None
-    if tz_pos > 0:
+    if tz_pos == len_str and tstr[-1] == 'Z':
+        tzi = timezone.utc
+    elif tz_pos > 0:
         tzstr = tstr[tz_pos:]
 
         # Valid time zone strings are:
+        # HH                  len: 2
+        # HHMM                len: 4
         # HH:MM               len: 5
+        # HHMMSS              len: 6
+        # HHMMSS.f+           len: 7+
         # HH:MM:SS            len: 8
-        # HH:MM:SS.ffffff     len: 15
+        # HH:MM:SS.f+         len: 10+
 
-        if len(tzstr) not in (5, 8, 15):
-            raise ValueError('Malformed time zone string')
+        if len(tzstr) in (0, 1, 3):
+            raise ValueError("Malformed time zone string")
 
         tz_comps = _parse_hh_mm_ss_ff(tzstr)
+
         if all(x == 0 for x in tz_comps):
             tzi = timezone.utc
         else:
@@ -356,6 +455,38 @@ def _parse_isoformat_time(tstr):
 
     return time_comps
 
+# tuple[int, int, int] -> tuple[int, int, int] version of date.fromisocalendar
+def _isoweek_to_gregorian(year, week, day):
+    # Year is bounded this way because 9999-12-31 is (9999, 52, 5)
+    if not MINYEAR <= year <= MAXYEAR:
+        raise ValueError(f"Year is out of range: {year}")
+
+    if not 0 < week < 53:
+        out_of_range = True
+
+        if week == 53:
+            # ISO years have 53 weeks in them on years starting with a
+            # Thursday and leap years starting on a Wednesday
+            first_weekday = _ymd2ord(year, 1, 1) % 7
+            if (first_weekday == 4 or (first_weekday == 3 and
+                                       _is_leap(year))):
+                out_of_range = False
+
+        if out_of_range:
+            raise ValueError(f"Invalid week: {week}")
+
+    if not 0 < day < 8:
+        raise ValueError(f"Invalid weekday: {day} (range is [1, 7])")
+
+    # Now compute the offset from (Y, 1, 1) in days:
+    day_offset = (week - 1) * 7 + (day - 1)
+
+    # Calculate the ordinal day for monday, week 1
+    day_1 = _isoweek1monday(year)
+    ord_day = day_1 + day_offset
+
+    return _ord2ymd(ord_day)
+
 
 # Just raise TypeError if the arg isn't None or a string.
 def _check_tzname(name):
@@ -847,12 +978,14 @@ def fromordinal(cls, n):
 
     @classmethod
     def fromisoformat(cls, date_string):
-        """Construct a date from the output of date.isoformat()."""
+        """Construct a date from a string in ISO 8601 format."""
         if not isinstance(date_string, str):
             raise TypeError('fromisoformat: argument must be str')
 
+        if len(date_string) not in (7, 8, 10):
+            raise ValueError(f'Invalid isoformat string: {date_string!r}')
+
         try:
-            assert len(date_string) == 10
             return cls(*_parse_isoformat_date(date_string))
         except Exception:
             raise ValueError(f'Invalid isoformat string: {date_string!r}')
@@ -862,35 +995,7 @@ def fromisocalendar(cls, year, week, day):
         """Construct a date from the ISO year, week number and weekday.
 
         This is the inverse of the date.isocalendar() function"""
-        # Year is bounded this way because 9999-12-31 is (9999, 52, 5)
-        if not MINYEAR <= year <= MAXYEAR:
-            raise ValueError(f"Year is out of range: {year}")
-
-        if not 0 < week < 53:
-            out_of_range = True
-
-            if week == 53:
-                # ISO years have 53 weeks in them on years starting with a
-                # Thursday and leap years starting on a Wednesday
-                first_weekday = _ymd2ord(year, 1, 1) % 7
-                if (first_weekday == 4 or (first_weekday == 3 and
-                                           _is_leap(year))):
-                    out_of_range = False
-
-            if out_of_range:
-                raise ValueError(f"Invalid week: {week}")
-
-        if not 0 < day < 8:
-            raise ValueError(f"Invalid weekday: {day} (range is [1, 7])")
-
-        # Now compute the offset from (Y, 1, 1) in days:
-        day_offset = (week - 1) * 7 + (day - 1)
-
-        # Calculate the ordinal day for monday, week 1
-        day_1 = _isoweek1monday(year)
-        ord_day = day_1 + day_offset
-
-        return cls(*_ord2ymd(ord_day))
+        return cls(*_isoweek_to_gregorian(year, week, day))
 
     # Conversions to string
 
@@ -1427,10 +1532,15 @@ def isoformat(self, timespec='auto'):
 
     @classmethod
     def fromisoformat(cls, time_string):
-        """Construct a time from the output of isoformat()."""
+        """Construct a time from a string in one of the ISO 8601 formats."""
         if not isinstance(time_string, str):
             raise TypeError('fromisoformat: argument must be str')
 
+        # The spec actually requires that time-only ISO 8601 strings start with
+        # T, but the extended format allows this to be omitted as long as there
+        # is no ambiguity with date strings.
+        time_string = time_string.removeprefix('T')
+
         try:
             return cls(*_parse_isoformat_time(time_string))
         except Exception:
@@ -1711,24 +1821,30 @@ def combine(cls, date, time, tzinfo=True):
 
     @classmethod
     def fromisoformat(cls, date_string):
-        """Construct a datetime from the output of datetime.isoformat()."""
+        """Construct a datetime from a string in one of the ISO 8601 formats."""
         if not isinstance(date_string, str):
             raise TypeError('fromisoformat: argument must be str')
 
-        # Split this at the separator
-        dstr = date_string[0:10]
-        tstr = date_string[11:]
+        if len(date_string) < 7:
+            raise ValueError(f'Invalid isoformat string: {date_string!r}')
 
+        # Split this at the separator
         try:
+            separator_location = _find_isoformat_datetime_separator(date_string)
+            dstr = date_string[0:separator_location]
+            tstr = date_string[(separator_location+1):]
+
             date_components = _parse_isoformat_date(dstr)
         except ValueError:
-            raise ValueError(f'Invalid isoformat string: {date_string!r}')
+            raise ValueError(
+                f'Invalid isoformat string: {date_string!r}') from None
 
         if tstr:
             try:
                 time_components = _parse_isoformat_time(tstr)
             except ValueError:
-                raise ValueError(f'Invalid isoformat string: {date_string!r}')
+                raise ValueError(
+                    f'Invalid isoformat string: {date_string!r}') from None
         else:
             time_components = [0, 0, 0, 0, None]
 
@@ -2509,7 +2625,9 @@ def _name_from_offset(delta):
          _format_time, _format_offset, _index, _is_leap, _isoweek1monday, _math,
          _ord2ymd, _time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord,
          _divide_and_round, _parse_isoformat_date, _parse_isoformat_time,
-         _parse_hh_mm_ss_ff, _IsoCalendarDate)
+         _parse_hh_mm_ss_ff, _IsoCalendarDate, _isoweek_to_gregorian,
+         _find_isoformat_datetime_separator, _FRACTION_CORRECTION,
+         _is_ascii_digit)
     # XXX Since import * above excludes names that start with _,
     # docstring does not get overwritten. In the future, it may be
     # appropriate to maintain a single module level docstring and
diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py
index d85b5466f7fc2..0495362b3f369 100644
--- a/Lib/test/datetimetester.py
+++ b/Lib/test/datetimetester.py
@@ -7,6 +7,7 @@
 import bisect
 import copy
 import decimal
+import functools
 import sys
 import os
 import pickle
@@ -1840,6 +1841,41 @@ def test_fromisoformat(self):
 
                 self.assertEqual(dt, dt_rt)
 
+    def test_fromisoformat_date_examples(self):
+        examples = [
+            ('00010101', self.theclass(1, 1, 1)),
+            ('20000101', self.theclass(2000, 1, 1)),
+            ('20250102', self.theclass(2025, 1, 2)),
+            ('99991231', self.theclass(9999, 12, 31)),
+            ('0001-01-01', self.theclass(1, 1, 1)),
+            ('2000-01-01', self.theclass(2000, 1, 1)),
+            ('2025-01-02', self.theclass(2025, 1, 2)),
+            ('9999-12-31', self.theclass(9999, 12, 31)),
+            ('2025W01', self.theclass(2024, 12, 30)),
+            ('2025-W01', self.theclass(2024, 12, 30)),
+            ('2025W014', self.theclass(2025, 1, 2)),
+            ('2025-W01-4', self.theclass(2025, 1, 2)),
+            ('2026W01', self.theclass(2025, 12, 29)),
+            ('2026-W01', self.theclass(2025, 12, 29)),
+            ('2026W013', self.theclass(2025, 12, 31)),
+            ('2026-W01-3', self.theclass(2025, 12, 31)),
+            ('2022W52', self.theclass(2022, 12, 26)),
+            ('2022-W52', self.theclass(2022, 12, 26)),
+            ('2022W527', self.theclass(2023, 1, 1)),
+            ('2022-W52-7', self.theclass(2023, 1, 1)),
+            ('2015W534', self.theclass(2015, 12, 31)),      # Has week 53
+            ('2015-W53-4', self.theclass(2015, 12, 31)),    # Has week 53
+            ('2015-W53-5', self.theclass(2016, 1, 1)),
+            ('2020W531', self.theclass(2020, 12, 28)),      # Leap year
+            ('2020-W53-1', self.theclass(2020, 12, 28)),    # Leap year
+            ('2020-W53-6', self.theclass(2021, 1, 2)),
+        ]
+
+        for input_str, expected in examples:
+            with self.subTest(input_str=input_str):
+                actual = self.theclass.fromisoformat(input_str)
+                self.assertEqual(actual, expected)
+
     def test_fromisoformat_subclass(self):
         class DateSubclass(self.theclass):
             pass
@@ -1862,7 +1898,8 @@ def test_fromisoformat_fails(self):
             '2009-12-0a',       # Invalid character in day
             '2009-01-32',       # Invalid day
             '2009-02-29',       # Invalid leap day
-            '20090228',         # Valid ISO8601 output not from isoformat()
+            '2019-W53-1',       # No week 53 in 2019
+            '2020-W54-1',       # No week 54
             '2009\ud80002\ud80028',     # Separators are surrogate codepoints
         ]
 
@@ -3003,6 +3040,140 @@ def test_fromisoformat_timespecs(self):
                         dt_rt = self.theclass.fromisoformat(dtstr)
                         self.assertEqual(dt, dt_rt)
 
+    def test_fromisoformat_datetime_examples(self):
+        UTC = timezone.utc
+        BST = timezone(timedelta(hours=1), 'BST')
+        EST = timezone(timedelta(hours=-5), 'EST')
+        EDT = timezone(timedelta(hours=-4), 'EDT')
+        examples = [
+            ('2025-01-02', self.theclass(2025, 1, 2, 0, 0)),
+            ('2025-01-02T03', self.theclass(2025, 1, 2, 3, 0)),
+            ('2025-01-02T03:04', self.theclass(2025, 1, 2, 3, 4)),
+            ('2025-01-02T0304', self.theclass(2025, 1, 2, 3, 4)),
+            ('2025-01-02T03:04:05', self.theclass(2025, 1, 2, 3, 4, 5)),
+            ('2025-01-02T030405', self.theclass(2025, 1, 2, 3, 4, 5)),
+            ('2025-01-02T03:04:05.6',
+             self.theclass(2025, 1, 2, 3, 4, 5, 600000)),
+            ('2025-01-02T03:04:05,6',
+             self.theclass(2025, 1, 2, 3, 4, 5, 600000)),
+            ('2025-01-02T03:04:05.678',
+             self.theclass(2025, 1, 2, 3, 4, 5, 678000)),
+            ('2025-01-02T03:04:05.678901',
+             self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
+            ('2025-01-02T03:04:05,678901',
+             self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
+            ('2025-01-02T030405.678901',
+             self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
+            ('2025-01-02T030405,678901',
+             self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
+            ('2025-01-02T03:04:05.6789010',
+             self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
+            ('2009-04-19T03:15:45.2345',
+             self.theclass(2009, 4, 19, 3, 15, 45, 234500)),
+            ('2009-04-19T03:15:45.1234567',
+             self.theclass(2009, 4, 19, 3, 15, 45, 123456)),
+            ('2025-01-02T03:04:05,678',
+             self.theclass(2025, 1, 2, 3, 4, 5, 678000)),
+            ('20250102', self.theclass(2025, 1, 2, 0, 0)),
+            ('20250102T03', self.theclass(2025, 1, 2, 3, 0)),
+            ('20250102T03:04', self.theclass(2025, 1, 2, 3, 4)),
+            ('20250102T03:04:05', self.theclass(2025, 1, 2, 3, 4, 5)),
+            ('20250102T030405', self.theclass(2025, 1, 2, 3, 4, 5)),
+            ('20250102T03:04:05.6',
+             self.theclass(2025, 1, 2, 3, 4, 5, 600000)),
+            ('20250102T03:04:05,6',
+             self.theclass(2025, 1, 2, 3, 4, 5, 600000)),
+            ('20250102T03:04:05.678',
+             self.theclass(2025, 1, 2, 3, 4, 5, 678000)),
+            ('20250102T03:04:05,678',
+             self.theclass(2025, 1, 2, 3, 4, 5, 678000)),
+            ('20250102T03:04:05.678901',
+             self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
+            ('20250102T030405.678901',
+             self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
+            ('20250102T030405,678901',
+             self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
+            ('20250102T030405.6789010',
+             self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
+            ('2022W01', self.theclass(2022, 1, 3)),
+            ('2022W52520', self.theclass(2022, 12, 26, 20, 0)),
+            ('2022W527520', self.theclass(2023, 1, 1, 20, 0)),
+            ('2026W01516', self.theclass(2025, 12, 29, 16, 0)),
+            ('2026W013516', self.theclass(2025, 12, 31, 16, 0)),
+            ('2025W01503', self.theclass(2024, 12, 30, 3, 0)),
+            ('2025W014503', self.theclass(2025, 1, 2, 3, 0)),
+            ('2025W01512', self.theclass(2024, 12, 30, 12, 0)),
+            ('2025W014512', self.theclass(2025, 1, 2, 12, 0)),
+            ('2025W014T121431', self.theclass(2025, 1, 2, 12, 14, 31)),
+            ('2026W013T162100', self.theclass(2025, 12, 31, 16, 21)),
+            ('2026W013 162100', self.theclass(2025, 12, 31, 16, 21)),
+            ('2022W527T202159', self.theclass(2023, 1, 1, 20, 21, 59)),
+            ('2022W527 202159', self.theclass(2023, 1, 1, 20, 21, 59)),
+            ('2025W014 121431', self.theclass(2025, 1, 2, 12, 14, 31)),
+            ('2025W014T030405', self.theclass(2025, 1, 2, 3, 4, 5)),
+            ('2025W014 030405', self.theclass(2025, 1, 2, 3, 4, 5)),
+            ('2020-W53-6T03:04:05', self.theclass(2021, 1, 2, 3, 4, 5)),
+            ('2020W537 03:04:05', self.theclass(2021, 1, 3, 3, 4, 5)),
+            ('2025-W01-4T03:04:05', self.theclass(2025, 1, 2, 3, 4, 5)),
+            ('2025-W01-4T03:04:05.678901',
+             self.theclass(2025, 1, 2, 3, 4, 5, 678901)),
+            ('2025-W01-4T12:14:31', self.theclass(2025, 1, 2, 12, 14, 31)),
+            ('2025-W01-4T12:14:31.012345',
+             self.theclass(2025, 1, 2, 12, 14, 31, 12345)),
+            ('2026-W01-3T16:21:00', self.theclass(2025, 12, 31, 16, 21)),
+            ('2026-W01-3T16:21:00.000000', self.theclass(2025, 12, 31, 16, 21)),
+            ('2022-W52-7T20:21:59',
+             self.theclass(2023, 1, 1, 20, 21, 59)),
+            ('2022-W52-7T20:21:59.999999',
+             self.theclass(2023, 1, 1, 20, 21, 59, 999999)),
+            ('2025-W01003+00',
+             self.theclass(2024, 12, 30, 3, 0, tzinfo=UTC)),
+            ('2025-01-02T03:04:05+00',
+             self.theclass(2025, 1, 2, 3, 4, 5, tzinfo=UTC)),
+            ('2025-01-02T03:04:05Z',
+             self.theclass(2025, 1, 2, 3, 4, 5, tzinfo=UTC)),
+            ('2025-01-02003:04:05,6+00:00:00.00',
+             self.theclass(2025, 1, 2, 3, 4, 5, 600000, tzinfo=UTC)),
+            ('2000-01-01T00+21',
+             self.theclass(2000, 1, 1, 0, 0, tzinfo=timezone(timedelta(hours=21)))),
+            ('2025-01-02T03:05:06+0300',
+             self.theclass(2025, 1, 2, 3, 5, 6,
+                           tzinfo=timezone(timedelta(hours=3)))),
+            ('2025-01-02T03:05:06-0300',
+             self.theclass(2025, 1, 2, 3, 5, 6,
+                           tzinfo=timezone(timedelta(hours=-3)))),
+            ('2025-01-02T03:04:05+0000',
+             self.theclass(2025, 1, 2, 3, 4, 5, tzinfo=UTC)),
+            ('2025-01-02T03:05:06+03',
+             self.theclass(2025, 1, 2, 3, 5, 6,
+                           tzinfo=timezone(timedelta(hours=3)))),
+            ('2025-01-02T03:05:06-03',
+             self.theclass(2025, 1, 2, 3, 5, 6,
+                           tzinfo=timezone(timedelta(hours=-3)))),
+            ('2020-01-01T03:05:07.123457-05:00',
+             self.theclass(2020, 1, 1, 3, 5, 7, 123457, tzinfo=EST)),
+            ('2020-01-01T03:05:07.123457-0500',
+             self.theclass(2020, 1, 1, 3, 5, 7, 123457, tzinfo=EST)),
+            ('2020-06-01T04:05:06.111111-04:00',
+             self.theclass(2020, 6, 1, 4, 5, 6, 111111, tzinfo=EDT)),
+            ('2020-06-01T04:05:06.111111-0400',
+             self.theclass(2020, 6, 1, 4, 5, 6, 111111, tzinfo=EDT)),
+            ('2021-10-31T01:30:00.000000+01:00',
+             self.theclass(2021, 10, 31, 1, 30, tzinfo=BST)),
+            ('2021-10-31T01:30:00.000000+0100',
+             self.theclass(2021, 10, 31, 1, 30, tzinfo=BST)),
+            ('2025-01-02T03:04:05,6+000000.00',
+             self.theclass(2025, 1, 2, 3, 4, 5, 600000, tzinfo=UTC)),
+            ('2025-01-02T03:04:05,678+00:00:10',
+             self.theclass(2025, 1, 2, 3, 4, 5, 678000,
+                           tzinfo=timezone(timedelta(seconds=10)))),
+        ]
+
+        for input_str, expected in examples:
+            with self.subTest(input_str=input_str):
+                actual = self.theclass.fromisoformat(input_str)
+                self.assertEqual(actual, expected)
+
     def test_fromisoformat_fails_datetime(self):
         # Test that fromisoformat() fails on invalid values
         bad_strs = [
@@ -3016,8 +3187,6 @@ def test_fromisoformat_fails_datetime(self):
             '2009-04-19T03;15:45',          # Bad first time separator
             '2009-04-19T03:15;45',          # Bad second time separator
             '2009-04-19T03:15:4500:00',     # Bad time zone separator
-            '2009-04-19T03:15:45.2345',     # Too many digits for milliseconds
-            '2009-04-19T03:15:45.1234567',  # Too many digits for microseconds
             '2009-04-19T03:15:45.123456+24:30',    # Invalid time zone offset
             '2009-04-19T03:15:45.123456-24:30',    # Invalid negative offset
             '2009-04-10ᛇᛇᛇᛇᛇ12:15',         # Too many unicode separators
@@ -3962,6 +4131,76 @@ def test_fromisoformat_timespecs(self):
                         t_rt = self.theclass.fromisoformat(tstr)
                         self.assertEqual(t, t_rt)
 
+    def test_fromisoformat_fractions(self):
+        strs = [
+            ('12:30:45.1', (12, 30, 45, 100000)),
+            ('12:30:45.12', (12, 30, 45, 120000)),
+            ('12:30:45.123', (12, 30, 45, 123000)),
+            ('12:30:45.1234', (12, 30, 45, 123400)),
+            ('12:30:45.12345', (12, 30, 45, 123450)),
+            ('12:30:45.123456', (12, 30, 45, 123456)),
+            ('12:30:45.1234567', (12, 30, 45, 123456)),
+            ('12:30:45.12345678', (12, 30, 45, 123456)),
+        ]
+
+        for time_str, time_comps in strs:
+            expected = self.theclass(*time_comps)
+            actual = self.theclass.fromisoformat(time_str)
+
+            self.assertEqual(actual, expected)
+
+    def test_fromisoformat_time_examples(self):
+        examples = [
+            ('0000', self.theclass(0, 0)),
+            ('00:00', self.theclass(0, 0)),
+            ('000000', self.theclass(0, 0)),
+            ('00:00:00', self.theclass(0, 0)),
+            ('000000.0', self.theclass(0, 0)),
+            ('00:00:00.0', self.theclass(0, 0)),
+            ('000000.000', self.theclass(0, 0)),
+            ('00:00:00.000', self.theclass(0, 0)),
+            ('000000.000000', self.theclass(0, 0)),
+            ('00:00:00.000000', self.theclass(0, 0)),
+            ('1200', self.theclass(12, 0)),
+            ('12:00', self.theclass(12, 0)),
+            ('120000', self.theclass(12, 0)),
+            ('12:00:00', self.theclass(12, 0)),
+            ('120000.0', self.theclass(12, 0)),
+            ('12:00:00.0', self.theclass(12, 0)),
+            ('120000.000', self.theclass(12, 0)),
+            ('12:00:00.000', self.theclass(12, 0)),
+            ('120000.000000', self.theclass(12, 0)),
+            ('12:00:00.000000', self.theclass(12, 0)),
+            ('2359', self.theclass(23, 59)),
+            ('23:59', self.theclass(23, 59)),
+            ('235959', self.theclass(23, 59, 59)),
+            ('23:59:59', self.theclass(23, 59, 59)),
+            ('235959.9', self.theclass(23, 59, 59, 900000)),
+            ('23:59:59.9', self.theclass(23, 59, 59, 900000)),
+            ('235959.999', self.theclass(23, 59, 59, 999000)),
+            ('23:59:59.999', self.theclass(23, 59, 59, 999000)),
+            ('235959.999999', self.theclass(23, 59, 59, 999999)),
+            ('23:59:59.999999', self.theclass(23, 59, 59, 999999)),
+            ('00:00:00Z', self.theclass(0, 0, tzinfo=timezone.utc)),
+            ('12:00:00+0000', self.theclass(12, 0, tzinfo=timezone.utc)),
+            ('12:00:00+00:00', self.theclass(12, 0, tzinfo=timezone.utc)),
+            ('00:00:00+05',
+             self.theclass(0, 0, tzinfo=timezone(timedelta(hours=5)))),
+            ('00:00:00+05:30',
+             self.theclass(0, 0, tzinfo=timezone(timedelta(hours=5, minutes=30)))),
+            ('12:00:00-05:00',
+             self.theclass(12, 0, tzinfo=timezone(timedelta(hours=-5)))),
+            ('12:00:00-0500',
+             self.theclass(12, 0, tzinfo=timezone(timedelta(hours=-5)))),
+            ('00:00:00,000-23:59:59.999999',
+             self.theclass(0, 0, tzinfo=timezone(-timedelta(hours=23, minutes=59, seconds=59, microseconds=999999)))),
+        ]
+
+        for input_str, expected in examples:
+            with self.subTest(input_str=input_str):
+                actual = self.theclass.fromisoformat(input_str)
+                self.assertEqual(actual, expected)
+
     def test_fromisoformat_fails(self):
         bad_strs = [
             '',                         # Empty string
@@ -3975,15 +4214,17 @@ def test_fromisoformat_fails(self):
             '1a:30:45.334034',          # Invalid character in hours
             '12:a0:45.334034',          # Invalid character in minutes
             '12:30:a5.334034',          # Invalid character in seconds
-            '12:30:45.1234',            # Too many digits for milliseconds
-            '12:30:45.1234567',         # Too many digits for microseconds
             '12:30:45.123456+24:30',    # Invalid time zone offset
             '12:30:45.123456-24:30',    # Invalid negative offset
             '12:30:45',                 # Uses full-width unicode colons
+            '12:30:45.123456a',         # Non-numeric data after 6 components
+            '12:30:45.123456789a',      # Non-numeric data after 9 components
             '12:30:45․123456',          # Uses \u2024 in place of decimal point
             '12:30:45a',                # Extra at tend of basic time
             '12:30:45.123a',            # Extra at end of millisecond time
             '12:30:45.123456a',         # Extra at end of microsecond time
+            '12:30:45.123456-',         # Extra at end of microsecond time
+            '12:30:45.123456+',         # Extra at end of microsecond time
             '12:30:45.123456+12:00:30a',    # Extra at end of full time
         ]
 
diff --git a/Misc/NEWS.d/next/Library/2022-05-03-12-11-27.gh-issue-80010.yG54RE.rst b/Misc/NEWS.d/next/Library/2022-05-03-12-11-27.gh-issue-80010.yG54RE.rst
new file mode 100644
index 0000000000000..bbcef471d2c0d
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-05-03-12-11-27.gh-issue-80010.yG54RE.rst
@@ -0,0 +1,3 @@
+Add support for generalized ISO 8601 parsing to
+:meth:`datetime.datetime.fromisoformat`, :meth:`datetime.date.fromisoformat`
+and :meth:`datetime.time.fromisoformat`. Patch by Paul Ganssle.
diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c
index 20cdb1822ab96..efb5278038f2f 100644
--- a/Modules/_datetimemodule.c
+++ b/Modules/_datetimemodule.c
@@ -395,6 +395,39 @@ iso_week1_monday(int year)
     return week1_monday;
 }
 
+static int
+iso_to_ymd(const int iso_year, const int iso_week, const int iso_day,
+           int *year, int *month, int *day) {
+    if (iso_week <= 0 || iso_week >= 53) {
+        int out_of_range = 1;
+        if (iso_week == 53) {
+            // ISO years have 53 weeks in it on years starting with a Thursday
+            // and on leap years starting on Wednesday
+            int first_weekday = weekday(iso_year, 1, 1);
+            if (first_weekday == 3 || (first_weekday == 2 && is_leap(iso_year))) {
+                out_of_range = 0;
+            }
+        }
+
+        if (out_of_range) {
+            return -2;
+        }
+    }
+
+    if (iso_day <= 0 || iso_day >= 8) {
+        return -3;
+    }
+
+    // Convert (Y, W, D) to (Y, M, D) in-place
+    int day_1 = iso_week1_monday(iso_year);
+
+    int day_offset = (iso_week - 1)*7 + iso_day - 1;
+
+    ord_to_ymd(day_1 + day_offset, year, month, day);
+    return 0;
+}
+
+
 /* ---------------------------------------------------------------------------
  * Range checkers.
  */
@@ -680,6 +713,11 @@ set_date_fields(PyDateTime_Date *self, int y, int m, int d)
  * String parsing utilities and helper functions
  */
 
+static unsigned char
+is_digit(const char c) {
+    return ((unsigned int)(c - '0')) < 10;
+}
+
 static const char *
 parse_digits(const char *ptr, int *var, size_t num_digits)
 {
@@ -696,14 +734,17 @@ parse_digits(const char *ptr, int *var, size_t num_digits)
 }
 
 static int
-parse_isoformat_date(const char *dtstr, int *year, int *month, int *day)
+parse_isoformat_date(const char *dtstr, const size_t len, int *year, int *month, int *day)
 {
     /* Parse the date components of the result of date.isoformat()
      *
      *  Return codes:
      *       0:  Success
      *      -1:  Failed to parse date component
-     *      -2:  Failed to parse dateseparator
+     *      -2:  Inconsistent date separator usage
+     *      -3:  Failed to parse ISO week.
+     *      -4:  Failed to parse ISO day.
+     *      -5, -6: Failure in iso_to_ymd
      */
     const char *p = dtstr;
     p = parse_digits(p, year, 4);
@@ -711,8 +752,42 @@ parse_isoformat_date(const char *dtstr, int *year, int *month, int *day)
         return -1;
     }
 
-    if (*(p++) != '-') {
-        return -2;
+    const unsigned char uses_separator = (*p == '-');
+    if (uses_separator) {
+        ++p;
+    }
+
+    if(*p == 'W') {
+        // This is an isocalendar-style date string
+        p++;
+        int iso_week = 0;
+        int iso_day = 0;
+
+        p = parse_digits(p, &iso_week, 2);
+        if (NULL == p) {
+            return -3;
+        }
+
+        assert(p > dtstr);
+        if ((size_t)(p - dtstr) < len) {
+            if (uses_separator && *(p++) != '-') {
+                return -2;
+            }
+
+            p = parse_digits(p, &iso_day, 1);
+            if (NULL == p) {
+                return -4;
+            }
+        } else {
+            iso_day = 1;
+        }
+
+        int rv = iso_to_ymd(*year, iso_week, iso_day, year, month, day);
+        if (rv) {
+            return -3 + rv;
+        } else {
+            return 0;
+        }
     }
 
     p = parse_digits(p, month, 2);
@@ -720,15 +795,13 @@ parse_isoformat_date(const char *dtstr, int *year, int *month, int *day)
         return -1;
     }
 
-    if (*(p++) != '-') {
+    if (uses_separator && *(p++) != '-') {
         return -2;
     }
-
     p = parse_digits(p, day, 2);
     if (p == NULL) {
         return -1;
     }
-
     return 0;
 }
 
@@ -736,11 +809,14 @@ static int
 parse_hh_mm_ss_ff(const char *tstr, const char *tstr_end, int *hour,
                   int *minute, int *second, int *microsecond)
 {
+    *hour = *minute = *second = *microsecond = 0;
     const char *p = tstr;
     const char *p_end = tstr_end;
     int *vals[3] = {hour, minute, second};
+    // This is initialized to satisfy an erroneous compiler warning.
+    unsigned char has_separator = 1;
 
-    // Parse [HH[:MM[:SS]]]
+    // Parse [HH[:?MM[:?SS]]]
     for (size_t i = 0; i < 3; ++i) {
         p = parse_digits(p, vals[i], 2);
         if (NULL == p) {
@@ -748,33 +824,47 @@ parse_hh_mm_ss_ff(const char *tstr, const char *tstr_end, int *hour,
         }
 
         char c = *(p++);
+        if (i == 0) {
+            has_separator = (c == ':');
+        }
+
         if (p >= p_end) {
             return c != '\0';
         }
-        else if (c == ':') {
+        else if (has_separator && (c == ':')) {
             continue;
         }
-        else if (c == '.') {
+        else if (c == '.' || c == ',') {
             break;
-        }
-        else {
+        } else if (!has_separator) {
+            --p;
+        } else {
             return -4;  // Malformed time separator
         }
     }
 
-    // Parse .fff[fff]
+    // Parse fractional components
     size_t len_remains = p_end - p;
-    if (!(len_remains == 6 || len_remains == 3)) {
-        return -3;
+    size_t to_parse = len_remains;
+    if (len_remains >= 6) {
+        to_parse = 6;
     }
 
-    p = parse_digits(p, microsecond, len_remains);
+    p = parse_digits(p, microsecond, to_parse);
     if (NULL == p) {
         return -3;
     }
 
-    if (len_remains == 3) {
-        *microsecond *= 1000;
+    static int correction[] = {
+        100000, 10000, 1000, 100, 10
+    };
+
+    if (to_parse < 6) {
+        *microsecond *= correction[to_parse-1];
+    }
+
+    while (is_digit(*p)){
+        ++p; // skip truncated digits
     }
 
     // Return 1 if it's not the end of the string
@@ -800,7 +890,7 @@ parse_isoformat_time(const char *dtstr, size_t dtlen, int *hour, int *minute,
 
     const char *tzinfo_pos = p;
     do {
-        if (*tzinfo_pos == '+' || *tzinfo_pos == '-') {
+        if (*tzinfo_pos == 'Z' || *tzinfo_pos == '+' || *tzinfo_pos == '-') {
             break;
         }
     } while (++tzinfo_pos < p_end);
@@ -822,14 +912,16 @@ parse_isoformat_time(const char *dtstr, size_t dtlen, int *hour, int *minute,
         }
     }
 
-    // Parse time zone component
-    // Valid formats are:
-    //    - +HH:MM           (len  6)
-    //    - +HH:MM:SS        (len  9)
-    //    - +HH:MM:SS.ffffff (len 16)
-    size_t tzlen = p_end - tzinfo_pos;
-    if (!(tzlen == 6 || tzlen == 9 || tzlen == 16)) {
-        return -5;
+    // Special case UTC / Zulu time.
+    if (*tzinfo_pos == 'Z') {
+        *tzoffset = 0;
+        *tzmicrosecond = 0;
+
+        if (*(tzinfo_pos + 1) != '\0') {
+            return -5;
+        } else {
+            return 1;
+        }
     }
 
     int tzsign = (*tzinfo_pos == '-') ? -1 : 1;
@@ -2983,8 +3075,8 @@ date_fromisoformat(PyObject *cls, PyObject *dtstr)
     int year = 0, month = 0, day = 0;
 
     int rv;
-    if (len == 10) {
-        rv = parse_isoformat_date(dt_ptr, &year, &month, &day);
+    if (len == 7 || len == 8 || len == 10) {
+        rv = parse_isoformat_date(dt_ptr, len, &year, &month, &day);
     }
     else {
         rv = -1;
@@ -3027,37 +3119,21 @@ date_fromisocalendar(PyObject *cls, PyObject *args, PyObject *kw)
         return NULL;
     }
 
-    if (week <= 0 || week >= 53) {
-        int out_of_range = 1;
-        if (week == 53) {
-            // ISO years have 53 weeks in it on years starting with a Thursday
-            // and on leap years starting on Wednesday
-            int first_weekday = weekday(year, 1, 1);
-            if (first_weekday == 3 || (first_weekday == 2 && is_leap(year))) {
-                out_of_range = 0;
-            }
-        }
+    int month;
+    int rv = iso_to_ymd(year, week, day, &year, &month, &day);
 
-        if (out_of_range) {
-            PyErr_Format(PyExc_ValueError, "Invalid week: %d", week);
-            return NULL;
-        }
+
+    if (rv == -2) {
+        PyErr_Format(PyExc_ValueError, "Invalid week: %d", week);
+        return NULL;
     }
 
-    if (day <= 0 || day >= 8) {
+    if (rv == -3) {
         PyErr_Format(PyExc_ValueError, "Invalid day: %d (range is [1, 7])",
                      day);
         return NULL;
     }
 
-    // Convert (Y, W, D) to (Y, M, D) in-place
-    int day_1 = iso_week1_monday(year);
-
-    int month = week;
-    int day_offset = (month - 1)*7 + day - 1;
-
-    ord_to_ymd(day_1 + day_offset, &year, &month, &day);
-
     return new_date_subclass_ex(year, month, day, cls);
 }
 
@@ -3489,7 +3565,7 @@ static PyMethodDef date_methods[] = {
 
      {"fromisoformat", (PyCFunction)date_fromisoformat,  METH_O |
                                                          METH_CLASS,
-      PyDoc_STR("str -> Construct a date from the output of date.isoformat()")},
+      PyDoc_STR("str -> Construct a date from a string in ISO 8601 format.")},
 
      {"fromisocalendar", _PyCFunction_CAST(date_fromisocalendar),
       METH_VARARGS | METH_KEYWORDS | METH_CLASS,
@@ -4564,6 +4640,14 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) {
         goto invalid_string_error;
     }
 
+    // The spec actually requires that time-only ISO 8601 strings start with
+    // T, but the extended format allows this to be omitted as long as there
+    // is no ambiguity with date strings.
+    if (*p == 'T') {
+        ++p;
+        len -= 1;
+    }
+
     int hour = 0, minute = 0, second = 0, microsecond = 0;
     int tzoffset, tzimicrosecond = 0;
     int rv = parse_isoformat_time(p, len,
@@ -4671,7 +4755,7 @@ static PyMethodDef time_methods[] = {
      PyDoc_STR("Return time with new specified fields.")},
 
      {"fromisoformat", (PyCFunction)time_fromisoformat, METH_O | METH_CLASS,
-     PyDoc_STR("string -> time from time.isoformat() output")},
+     PyDoc_STR("string -> time from a string in ISO 8601 format")},
 
     {"__reduce_ex__", (PyCFunction)time_reduce_ex,        METH_VARARGS,
      PyDoc_STR("__reduce_ex__(proto) -> (cls, state)")},
@@ -5184,19 +5268,42 @@ datetime_combine(PyObject *cls, PyObject *args, PyObject *kw)
 static PyObject *
 _sanitize_isoformat_str(PyObject *dtstr)
 {
+    Py_ssize_t len = PyUnicode_GetLength(dtstr);
+    if (len < 7) {  // All valid ISO 8601 strings are at least 7 characters long
+        return NULL;
+    }
+
     // `fromisoformat` allows surrogate characters in exactly one position,
     // the separator; to allow datetime_fromisoformat to make the simplifying
     // assumption that all valid strings can be encoded in UTF-8, this function
     // replaces any surrogate character separators with `T`.
     //
     // The result of this, if not NULL, returns a new reference
-    Py_ssize_t len = PyUnicode_GetLength(dtstr);
-    if (len < 0) {
-        return NULL;
+    const void* const unicode_data = PyUnicode_DATA(dtstr);
+    const unsigned int kind = PyUnicode_KIND(dtstr);
+
+    // Depending on the format of the string, the separator can only ever be
+    // in positions 7, 8 or 10. We'll check each of these for a surrogate and
+    // if we find one, replace it with `T`. If there is more than one surrogate,
+    // we don't have to bother sanitizing it, because the function will later
+    // fail when we try to encode the string as ASCII.
+    static const size_t potential_separators[3] = {7, 8, 10};
+    size_t surrogate_separator = 0;
+    for(size_t idx = 0;
+         idx < sizeof(potential_separators) / sizeof(*potential_separators);
+         ++idx) {
+        size_t pos = potential_separators[idx];
+        if (pos > (size_t)len) {
+            break;
+        }
+
+        if(Py_UNICODE_IS_SURROGATE(PyUnicode_READ(kind, unicode_data, pos))) {
+            surrogate_separator = pos;
+            break;
+        }
     }
 
-    if (len <= 10 ||
-        !Py_UNICODE_IS_SURROGATE(PyUnicode_READ_CHAR(dtstr, 10))) {
+    if (surrogate_separator == 0) {
         Py_INCREF(dtstr);
         return dtstr;
     }
@@ -5206,7 +5313,7 @@ _sanitize_isoformat_str(PyObject *dtstr)
         return NULL;
     }
 
-    if (PyUnicode_WriteChar(str_out, 10, (Py_UCS4)'T')) {
+    if (PyUnicode_WriteChar(str_out, surrogate_separator, (Py_UCS4)'T')) {
         Py_DECREF(str_out);
         return NULL;
     }
@@ -5214,6 +5321,106 @@ _sanitize_isoformat_str(PyObject *dtstr)
     return str_out;
 }
 
+
+static Py_ssize_t
+_find_isoformat_datetime_separator(const char *dtstr, Py_ssize_t len) {
+    // The valid date formats can all be distinguished by characters 4 and 5
+    // and further narrowed down by character
+    // which tells us where to look for the separator character.
+    // Format    |  As-rendered |   Position
+    // ---------------------------------------
+    // %Y-%m-%d  |  YYYY-MM-DD  |    10
+    // %Y%m%d    |  YYYYMMDD    |     8
+    // %Y-W%V    |  YYYY-Www    |     8
+    // %YW%V     |  YYYYWww     |     7
+    // %Y-W%V-%u |  YYYY-Www-d  |    10
+    // %YW%V%u   |  YYYYWwwd    |     8
+    // %Y-%j     |  YYYY-DDD    |     8
+    // %Y%j      |  YYYYDDD     |     7
+    //
+    // Note that because we allow *any* character for the separator, in the
+    // case where character 4 is W, it's not straightforward to determine where
+    // the separator is — in the case of YYYY-Www-d, you have actual ambiguity,
+    // e.g. 2020-W01-0000 could be YYYY-Www-D0HH or YYYY-Www-HHMM, when the
+    // separator character is a number in the former case or a hyphen in the
+    // latter case.
+    //
+    // The case of YYYYWww can be distinguished from YYYYWwwd by tracking ahead
+    // to either the end of the string or the first non-numeric character —
+    // since the time components all come in pairs YYYYWww#HH can be
+    // distinguished from YYYYWwwd#HH by the fact that there will always be an
+    // odd number of digits before the first non-digit character in the former
+    // case.
+    static const char date_separator = '-';
+    static const char week_indicator = 'W';
+
+    if (len == 7) {
+        return 7;
+    }
+
+    if (dtstr[4] == date_separator) {
+        // YYYY-???
+
+        if (dtstr[5] == week_indicator) {
+            // YYYY-W??
+
+            if (len < 8) {
+                return -1;
+            }
+
+            if (len > 8 && dtstr[8] == date_separator) {
+                // YYYY-Www-D (10) or YYYY-Www-HH (8)
+                if (len == 9) { return -1; }
+                if (len > 10 && is_digit(dtstr[10])) {
+                    // This is as far as we'll try to go to resolve the
+                    // ambiguity for the moment — if we have YYYY-Www-##, the
+                    // separator is either a hyphen at 8 or a number at 10.
+                    //
+                    // We'll assume it's a hyphen at 8 because it's way more
+                    // likely that someone will use a hyphen as a separator
+                    // than a number, but at this point it's really best effort
+                    // because this is an extension of the spec anyway.
+                    return 8;
+                }
+
+                return 10;
+            } else {
+                // YYYY-Www (8)
+                return 8;
+            }
+        } else {
+            // YYYY-MM-DD (10)
+            return 10;
+        }
+    } else {
+        // YYYY???
+        if (dtstr[4] == week_indicator) {
+            // YYYYWww (7) or YYYYWwwd (8)
+            size_t idx = 7;
+            for (; idx < (size_t)len; ++idx) {
+                // Keep going until we run out of digits.
+                if (!is_digit(dtstr[idx])) {
+                    break;
+                }
+            }
+
+            if (idx < 9) {
+                return idx;
+            }
+
+            if (idx % 2 == 0) {
+                // If the index of the last number is even, it's YYYYWww
+                return 7;
+            } else {
+                return 8;
+            }
+        } else {
+            // YYYYMMDD (8)
+            return 8;
+        }
+    }
+}
+
 static PyObject *
 datetime_fromisoformat(PyObject *cls, PyObject *dtstr)
 {
@@ -5225,9 +5432,14 @@ datetime_fromisoformat(PyObject *cls, PyObject *dtstr)
         return NULL;
     }
 
+    // We only need to sanitize this string if the separator is a surrogate
+    // character. In the situation where the separator location is ambiguous,
+    // we don't have to sanitize it anything because that can only happen when
+    // the separator is either '-' or a number. This should mostly be a noop
+    // but it makes the reference counting easier if we still sanitize.
     PyObject *dtstr_clean = _sanitize_isoformat_str(dtstr);
     if (dtstr_clean == NULL) {
-        goto error;
+        goto invalid_string_error;
     }
 
     Py_ssize_t len;
@@ -5243,30 +5455,35 @@ datetime_fromisoformat(PyObject *cls, PyObject *dtstr)
         }
     }
 
+    const Py_ssize_t separator_location = _find_isoformat_datetime_separator(
+            dt_ptr, len);
+
+
     const char *p = dt_ptr;
 
     int year = 0, month = 0, day = 0;
     int hour = 0, minute = 0, second = 0, microsecond = 0;
     int tzoffset = 0, tzusec = 0;
 
-    // date has a fixed length of 10
-    int rv = parse_isoformat_date(p, &year, &month, &day);
+    // date runs up to separator_location
+    int rv = parse_isoformat_date(p, separator_location, &year, &month, &day);
 
-    if (!rv && len > 10) {
+    if (!rv && len > separator_location) {
         // In UTF-8, the length of multi-byte characters is encoded in the MSB
-        if ((p[10] & 0x80) == 0) {
-            p += 11;
+        p += separator_location;
+        if ((p[0] & 0x80) == 0) {
+            p += 1;
         }
         else {
-            switch (p[10] & 0xf0) {
+            switch (p[0] & 0xf0) {
                 case 0xe0:
-                    p += 13;
+                    p += 3;
                     break;
                 case 0xf0:
-                    p += 14;
+                    p += 4;
                     break;
                 default:
-                    p += 12;
+                    p += 2;
                     break;
             }
         }
@@ -6327,7 +6544,7 @@ static PyMethodDef datetime_methods[] = {
 
     {"fromisoformat", (PyCFunction)datetime_fromisoformat,
      METH_O | METH_CLASS,
-     PyDoc_STR("string -> datetime from datetime.isoformat() output")},
+     PyDoc_STR("string -> datetime from a string in most ISO 8601 formats")},
 
     /* Instance methods: */
 



More information about the Python-checkins mailing list