[Python-checkins] bpo-44150: Support optional weights parameter for fmean() (GH-26175)

Thu May 20 23:22:39 EDT 2021

https://github.com/python/cpython/commit/be4dd7fcd93ed29d362c4bbcc48151bc619d6595
commit: be4dd7fcd93ed29d362c4bbcc48151bc619d6595
branch: main
author: Raymond Hettinger <rhettinger at users.noreply.github.com>
committer: rhettinger <rhettinger at users.noreply.github.com>
date: 2021-05-20T20:22:26-07:00
summary:

bpo-44150: Support optional weights parameter for fmean() (GH-26175)

files:
A Misc/NEWS.d/next/Library/2021-05-16-11-57-38.bpo-44150.xAhhik.rst
M Doc/library/statistics.rst
M Lib/statistics.py
M Lib/test/test_statistics.py

diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index a65c9840b8113a..fce4cffd8c69b1 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -43,7 +43,7 @@ or sample.
 
 =======================  ===============================================================
 :func:`mean`             Arithmetic mean ("average") of data.
-:func:`fmean`            Fast, floating point arithmetic mean.
+:func:`fmean`            Fast, floating point arithmetic mean, with optional weighting.
 :func:`geometric_mean`   Geometric mean of data.
 :func:`harmonic_mean`    Harmonic mean of data.
 :func:`median`           Median (middle value) of data.
@@ -128,7 +128,7 @@ However, for reading convenience, most of the examples show sorted sequences.
       ``mean(data)`` is equivalent to calculating the true population mean μ.
 
 
-.. function:: fmean(data)
+.. function:: fmean(data, weights=None)
 
    Convert *data* to floats and compute the arithmetic mean.
 
@@ -141,8 +141,25 @@ However, for reading convenience, most of the examples show sorted sequences.
       >>> fmean([3.5, 4.0, 5.25])
       4.25
 
+   Optional weighting is supported.  For example, a professor assigns a
+   grade for a course by weighting quizzes at 20%, homework at 20%, a
+   midterm exam at 30%, and a final exam at 30%:
+
+   .. doctest::
+
+      >>> grades = [85, 92, 83, 91]
+      >>> weights = [0.20, 0.20, 0.30, 0.30]
+      >>> fmean(grades, weights)
+      87.6
+
+   If *weights* is supplied, it must be the same length as the *data* or
+   a :exc:`ValueError` will be raised.
+
    .. versionadded:: 3.8
 
+   .. versionchanged:: 3.11
+      Added support for *weights*.
+
 
 .. function:: geometric_mean(data)
 
diff --git a/Lib/statistics.py b/Lib/statistics.py
index 5d38f855020f43..bd3813ce1a4f19 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -136,7 +136,7 @@
 from itertools import groupby, repeat
 from bisect import bisect_left, bisect_right
 from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum
-from operator import itemgetter
+from operator import itemgetter, mul
 from collections import Counter, namedtuple
 
 # === Exceptions ===
@@ -345,7 +345,7 @@ def mean(data):
     return _convert(total / n, T)
 
 
-def fmean(data):
+def fmean(data, weights=None):
     """Convert data to floats and compute the arithmetic mean.
 
     This runs faster than the mean() function and it always returns a float.
@@ -363,13 +363,24 @@ def count(iterable):
             nonlocal n
             for n, x in enumerate(iterable, start=1):
                 yield x
-        total = fsum(count(data))
-    else:
+        data = count(data)
+    if weights is None:
         total = fsum(data)
-    try:
+        if not n:
+            raise StatisticsError('fmean requires at least one data point')
         return total / n
-    except ZeroDivisionError:
-        raise StatisticsError('fmean requires at least one data point') from None
+    try:
+        num_weights = len(weights)
+    except TypeError:
+        weights = list(weights)
+        num_weights = len(weights)
+    num = fsum(map(mul, data, weights))
+    if n != num_weights:
+        raise StatisticsError('data and weights must be the same length')
+    den = fsum(weights)
+    if not den:
+        raise StatisticsError('sum of weights must be non-zero')
+    return num / den
 
 
 def geometric_mean(data):
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index 70d269dea732de..3e6e17afe1c1b6 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -1972,6 +1972,27 @@ def test_special_values(self):
         with self.assertRaises(ValueError):
             fmean([Inf, -Inf])
 
+    def test_weights(self):
+        fmean = statistics.fmean
+        StatisticsError = statistics.StatisticsError
+        self.assertEqual(
+            fmean([10, 10, 10, 50], [0.25] * 4),
+            fmean([10, 10, 10, 50]))
+        self.assertEqual(
+            fmean([10, 10, 20], [0.25, 0.25, 0.50]),
+            fmean([10, 10, 20, 20]))
+        self.assertEqual(                           # inputs are iterators
+            fmean(iter([10, 10, 20]), iter([0.25, 0.25, 0.50])),
+            fmean([10, 10, 20, 20]))
+        with self.assertRaises(StatisticsError):
+            fmean([10, 20, 30], [1, 2])             # unequal lengths
+        with self.assertRaises(StatisticsError):
+            fmean(iter([10, 20, 30]), iter([1, 2])) # unequal lengths
+        with self.assertRaises(StatisticsError):
+            fmean([10, 20], [-1, 1])                # sum of weights is zero
+        with self.assertRaises(StatisticsError):
+            fmean(iter([10, 20]), iter([-1, 1]))    # sum of weights is zero
+
 
 # === Tests for variances and standard deviations ===
 
diff --git a/Misc/NEWS.d/next/Library/2021-05-16-11-57-38.bpo-44150.xAhhik.rst b/Misc/NEWS.d/next/Library/2021-05-16-11-57-38.bpo-44150.xAhhik.rst
new file mode 100644
index 00000000000000..f4c2786d13b05e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-05-16-11-57-38.bpo-44150.xAhhik.rst
@@ -0,0 +1 @@
+Add optional *weights* argument to statistics.fmean().