[Python-checkins] Fix inconsistent return type for statistics median_grouped() gh-92531 (#92533)

rhettinger webhook-mailer at python.org
Mon May 9 03:09:04 EDT 2022


https://github.com/python/cpython/commit/e01eeb7b4b8d00b9f5c6acb48957f46ac4e252c0
commit: e01eeb7b4b8d00b9f5c6acb48957f46ac4e252c0
branch: main
author: Raymond Hettinger <rhettinger at users.noreply.github.com>
committer: rhettinger <rhettinger at users.noreply.github.com>
date: 2022-05-09T02:08:41-05:00
summary:

Fix inconsistent return type for statistics median_grouped()  gh-92531 (#92533)

files:
A Misc/NEWS.d/next/Library/2022-05-09-01-27-25.gh-issue-92531.vV7S_O.rst
M Lib/statistics.py
M Lib/test/test_statistics.py

diff --git a/Lib/statistics.py b/Lib/statistics.py
index 54f4e13265189..2d66b0522f19d 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -611,7 +611,7 @@ def median_high(data):
     return data[n // 2]
 
 
-def median_grouped(data, interval=1):
+def median_grouped(data, interval=1.0):
     """Estimates the median for numeric data binned around the midpoints
     of consecutive, fixed-width intervals.
 
@@ -650,35 +650,34 @@ def median_grouped(data, interval=1):
     by exact multiples of *interval*.  This is essential for getting a
     correct result.  The function does not check this precondition.
 
+    Inputs may be any numeric type that can be coerced to a float during
+    the interpolation step.
+
     """
     data = sorted(data)
     n = len(data)
-    if n == 0:
+    if not n:
         raise StatisticsError("no median for empty data")
-    elif n == 1:
-        return data[0]
 
     # Find the value at the midpoint. Remember this corresponds to the
     # midpoint of the class interval.
     x = data[n // 2]
 
-    # Generate a clear error message for non-numeric data
-    for obj in (x, interval):
-        if isinstance(obj, (str, bytes)):
-            raise TypeError(f'expected a number but got {obj!r}')
-
     # Using O(log n) bisection, find where all the x values occur in the data.
     # All x will lie within data[i:j].
     i = bisect_left(data, x)
     j = bisect_right(data, x, lo=i)
 
+    # Coerce to floats, raising a TypeError if not possible
+    try:
+        interval = float(interval)
+        x = float(x)
+    except ValueError:
+        raise TypeError(f'Value cannot be converted to a float')
+
     # Interpolate the median using the formula found at:
     # https://www.cuemath.com/data/median-of-grouped-data/
-    try:
-        L = x - interval / 2  # The lower limit of the median interval.
-    except TypeError:
-        # Coerce mixed types to float.
-        L = float(x) - float(interval) / 2
+    L = x - interval / 2.0    # Lower limit of the median interval
     cf = i                    # Cumulative frequency of the preceding interval
     f = j - i                 # Number of elements in the median internal
     return L + interval * (n / 2 - cf) / f
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index ed6021d60bde7..6de98241c294d 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -1742,6 +1742,12 @@ def test_repeated_single_value(self):
                 data = [x]*count
                 self.assertEqual(self.func(data), float(x))
 
+    def test_single_value(self):
+        # Override method from AverageMixin.
+        # Average of a single value is the value as a float.
+        for x in (23, 42.5, 1.3e15, Fraction(15, 19), Decimal('0.28')):
+            self.assertEqual(self.func([x]), float(x))
+
     def test_odd_fractions(self):
         # Test median_grouped works with an odd number of Fractions.
         F = Fraction
diff --git a/Misc/NEWS.d/next/Library/2022-05-09-01-27-25.gh-issue-92531.vV7S_O.rst b/Misc/NEWS.d/next/Library/2022-05-09-01-27-25.gh-issue-92531.vV7S_O.rst
new file mode 100644
index 0000000000000..574fa6c4d9799
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-05-09-01-27-25.gh-issue-92531.vV7S_O.rst
@@ -0,0 +1,3 @@
+The statistics.median_grouped() function now always return a float.
+Formerly, it did not convert the input type when for sequences of length
+one.



More information about the Python-checkins mailing list