[Python-checkins] cpython: Issue #25177: Fixed problem with the mean of very small and very large numbers.

steven.daprano python-checkins at python.org
Tue Dec 1 09:11:16 EST 2015


https://hg.python.org/cpython/rev/0eeb39fc8ff5
changeset:   99408:0eeb39fc8ff5
parent:      99405:734247d5d0f9
user:        Steven D'Aprano <steve+python at pearwood.info>
date:        Tue Dec 01 19:59:53 2015 +1100
summary:
  Issue #25177: Fixed problem with the mean of very small and very large numbers.

files:
  Lib/statistics.py           |  181 +++++++----
  Lib/test/test_statistics.py |  363 ++++++++++++++++++++---
  Misc/NEWS                   |    4 +
  3 files changed, 431 insertions(+), 117 deletions(-)


diff --git a/Lib/statistics.py b/Lib/statistics.py
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -104,6 +104,8 @@
 
 from fractions import Fraction
 from decimal import Decimal
+from itertools import groupby
+
 
 
 # === Exceptions ===
@@ -115,86 +117,102 @@
 # === Private utilities ===
 
 def _sum(data, start=0):
-    """_sum(data [, start]) -> value
+    """_sum(data [, start]) -> (type, sum, count)
 
-    Return a high-precision sum of the given numeric data. If optional
-    argument ``start`` is given, it is added to the total. If ``data`` is
-    empty, ``start`` (defaulting to 0) is returned.
+    Return a high-precision sum of the given numeric data as a fraction,
+    together with the type to be converted to and the count of items.
+
+    If optional argument ``start`` is given, it is added to the total.
+    If ``data`` is empty, ``start`` (defaulting to 0) is returned.
 
 
     Examples
     --------
 
     >>> _sum([3, 2.25, 4.5, -0.5, 1.0], 0.75)
-    11.0
+    (<class 'float'>, Fraction(11, 1), 5)
 
     Some sources of round-off error will be avoided:
 
     >>> _sum([1e50, 1, -1e50] * 1000)  # Built-in sum returns zero.
-    1000.0
+    (<class 'float'>, Fraction(1000, 1), 3000)
 
     Fractions and Decimals are also supported:
 
     >>> from fractions import Fraction as F
     >>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)])
-    Fraction(63, 20)
+    (<class 'fractions.Fraction'>, Fraction(63, 20), 4)
 
     >>> from decimal import Decimal as D
     >>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")]
     >>> _sum(data)
-    Decimal('0.6963')
+    (<class 'decimal.Decimal'>, Fraction(6963, 10000), 4)
 
     Mixed types are currently treated as an error, except that int is
     allowed.
     """
-    # We fail as soon as we reach a value that is not an int or the type of
-    # the first value which is not an int. E.g. _sum([int, int, float, int])
-    # is okay, but sum([int, int, float, Fraction]) is not.
-    allowed_types = {int, type(start)}
+    count = 0
     n, d = _exact_ratio(start)
-    partials = {d: n}  # map {denominator: sum of numerators}
-    # Micro-optimizations.
-    exact_ratio = _exact_ratio
+    partials = {d: n}
     partials_get = partials.get
-    # Add numerators for each denominator.
-    for x in data:
-        _check_type(type(x), allowed_types)
-        n, d = exact_ratio(x)
-        partials[d] = partials_get(d, 0) + n
-    # Find the expected result type. If allowed_types has only one item, it
-    # will be int; if it has two, use the one which isn't int.
-    assert len(allowed_types) in (1, 2)
-    if len(allowed_types) == 1:
-        assert allowed_types.pop() is int
-        T = int
+    T = _coerce(int, type(start))
+    for typ, values in groupby(data, type):
+        T = _coerce(T, typ)  # or raise TypeError
+        for n,d in map(_exact_ratio, values):
+            count += 1
+            partials[d] = partials_get(d, 0) + n
+    if None in partials:
+        # The sum will be a NAN or INF. We can ignore all the finite
+        # partials, and just look at this special one.
+        total = partials[None]
+        assert not _isfinite(total)
     else:
-        T = (allowed_types - {int}).pop()
-    if None in partials:
-        assert issubclass(T, (float, Decimal))
-        assert not math.isfinite(partials[None])
-        return T(partials[None])
-    total = Fraction()
-    for d, n in sorted(partials.items()):
-        total += Fraction(n, d)
-    if issubclass(T, int):
-        assert total.denominator == 1
-        return T(total.numerator)
-    if issubclass(T, Decimal):
-        return T(total.numerator)/total.denominator
-    return T(total)
+        # Sum all the partial sums using builtin sum.
+        # FIXME is this faster if we sum them in order of the denominator?
+        total = sum(Fraction(n, d) for d, n in sorted(partials.items()))
+    return (T, total, count)
 
 
-def _check_type(T, allowed):
-    if T not in allowed:
-        if len(allowed) == 1:
-            allowed.add(T)
-        else:
-            types = ', '.join([t.__name__ for t in allowed] + [T.__name__])
-            raise TypeError("unsupported mixed types: %s" % types)
+def _isfinite(x):
+    try:
+        return x.is_finite()  # Likely a Decimal.
+    except AttributeError:
+        return math.isfinite(x)  # Coerces to float first.
+
+
+def _coerce(T, S):
+    """Coerce types T and S to a common type, or raise TypeError.
+
+    Coercion rules are currently an implementation detail. See the CoerceTest
+    test class in test_statistics for details.
+    """
+    # See http://bugs.python.org/issue24068.
+    assert T is not bool, "initial type T is bool"
+    # If the types are the same, no need to coerce anything. Put this
+    # first, so that the usual case (no coercion needed) happens as soon
+    # as possible.
+    if T is S:  return T
+    # Mixed int & other coerce to the other type.
+    if S is int or S is bool:  return T
+    if T is int:  return S
+    # If one is a (strict) subclass of the other, coerce to the subclass.
+    if issubclass(S, T):  return S
+    if issubclass(T, S):  return T
+    # Ints coerce to the other type.
+    if issubclass(T, int):  return S
+    if issubclass(S, int):  return T
+    # Mixed fraction & float coerces to float (or float subclass).
+    if issubclass(T, Fraction) and issubclass(S, float):
+        return S
+    if issubclass(T, float) and issubclass(S, Fraction):
+        return T
+    # Any other combination is disallowed.
+    msg = "don't know how to coerce %s and %s"
+    raise TypeError(msg % (T.__name__, S.__name__))
 
 
 def _exact_ratio(x):
-    """Convert Real number x exactly to (numerator, denominator) pair.
+    """Return Real number x to exact (numerator, denominator) pair.
 
     >>> _exact_ratio(0.25)
     (1, 4)
@@ -202,29 +220,31 @@
     x is expected to be an int, Fraction, Decimal or float.
     """
     try:
+        # Optimise the common case of floats. We expect that the most often
+        # used numeric type will be builtin floats, so try to make this as
+        # fast as possible.
+        if type(x) is float:
+            return x.as_integer_ratio()
         try:
-            # int, Fraction
+            # x may be an int, Fraction, or Integral ABC.
             return (x.numerator, x.denominator)
         except AttributeError:
-            # float
             try:
+                # x may be a float subclass.
                 return x.as_integer_ratio()
             except AttributeError:
-                # Decimal
                 try:
+                    # x may be a Decimal.
                     return _decimal_to_ratio(x)
                 except AttributeError:
-                    msg = "can't convert type '{}' to numerator/denominator"
-                    raise TypeError(msg.format(type(x).__name__)) from None
+                    # Just give up?
+                    pass
     except (OverflowError, ValueError):
-        # INF or NAN
-        if __debug__:
-            # Decimal signalling NANs cannot be converted to float :-(
-            if isinstance(x, Decimal):
-                assert not x.is_finite()
-            else:
-                assert not math.isfinite(x)
+        # float NAN or INF.
+        assert not math.isfinite(x)
         return (x, None)
+    msg = "can't convert type '{}' to numerator/denominator"
+    raise TypeError(msg.format(type(x).__name__))
 
 
 # FIXME This is faster than Fraction.from_decimal, but still too slow.
@@ -239,7 +259,7 @@
     sign, digits, exp = d.as_tuple()
     if exp in ('F', 'n', 'N'):  # INF, NAN, sNAN
         assert not d.is_finite()
-        raise ValueError
+        return (d, None)
     num = 0
     for digit in digits:
         num = num*10 + digit
@@ -253,6 +273,24 @@
     return (num, den)
 
 
+def _convert(value, T):
+    """Convert value to given numeric type T."""
+    if type(value) is T:
+        # This covers the cases where T is Fraction, or where value is
+        # a NAN or INF (Decimal or float).
+        return value
+    if issubclass(T, int) and value.denominator != 1:
+        T = float
+    try:
+        # FIXME: what do we do if this overflows?
+        return T(value)
+    except TypeError:
+        if issubclass(T, Decimal):
+            return T(value.numerator)/T(value.denominator)
+        else:
+            raise
+
+
 def _counts(data):
     # Generate a table of sorted (value, frequency) pairs.
     table = collections.Counter(iter(data)).most_common()
@@ -290,7 +328,9 @@
     n = len(data)
     if n < 1:
         raise StatisticsError('mean requires at least one data point')
-    return _sum(data)/n
+    T, total, count = _sum(data)
+    assert count == n
+    return _convert(total/n, T)
 
 
 # FIXME: investigate ways to calculate medians without sorting? Quickselect?
@@ -460,12 +500,14 @@
     """
     if c is None:
         c = mean(data)
-    ss = _sum((x-c)**2 for x in data)
+    T, total, count = _sum((x-c)**2 for x in data)
     # The following sum should mathematically equal zero, but due to rounding
     # error may not.
-    ss -= _sum((x-c) for x in data)**2/len(data)
-    assert not ss < 0, 'negative sum of square deviations: %f' % ss
-    return ss
+    U, total2, count2 = _sum((x-c) for x in data)
+    assert T == U and count == count2
+    total -=  total2**2/len(data)
+    assert not total < 0, 'negative sum of square deviations: %f' % total
+    return (T, total)
 
 
 def variance(data, xbar=None):
@@ -511,8 +553,8 @@
     n = len(data)
     if n < 2:
         raise StatisticsError('variance requires at least two data points')
-    ss = _ss(data, xbar)
-    return ss/(n-1)
+    T, ss = _ss(data, xbar)
+    return _convert(ss/(n-1), T)
 
 
 def pvariance(data, mu=None):
@@ -560,7 +602,8 @@
     if n < 1:
         raise StatisticsError('pvariance requires at least one data point')
     ss = _ss(data, mu)
-    return ss/n
+    T, ss = _ss(data, mu)
+    return _convert(ss/n, T)
 
 
 def stdev(data, xbar=None):
diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -21,6 +21,37 @@
 
 # === Helper functions and class ===
 
+def _nan_equal(a, b):
+    """Return True if a and b are both the same kind of NAN.
+
+    >>> _nan_equal(Decimal('NAN'), Decimal('NAN'))
+    True
+    >>> _nan_equal(Decimal('sNAN'), Decimal('sNAN'))
+    True
+    >>> _nan_equal(Decimal('NAN'), Decimal('sNAN'))
+    False
+    >>> _nan_equal(Decimal(42), Decimal('NAN'))
+    False
+
+    >>> _nan_equal(float('NAN'), float('NAN'))
+    True
+    >>> _nan_equal(float('NAN'), 0.5)
+    False
+
+    >>> _nan_equal(float('NAN'), Decimal('NAN'))
+    False
+
+    NAN payloads are not compared.
+    """
+    if type(a) is not type(b):
+        return False
+    if isinstance(a, float):
+        return math.isnan(a) and math.isnan(b)
+    aexp = a.as_tuple()[2]
+    bexp = b.as_tuple()[2]
+    return (aexp == bexp) and (aexp in ('n', 'N'))  # Both NAN or both sNAN.
+
+
 def _calc_errors(actual, expected):
     """Return the absolute and relative errors between two numbers.
 
@@ -675,15 +706,60 @@
         self.assertEqual(_exact_ratio(D("12.345")), (12345, 1000))
         self.assertEqual(_exact_ratio(D("-1.98")), (-198, 100))
 
+    def test_inf(self):
+        INF = float("INF")
+        class MyFloat(float):
+            pass
+        class MyDecimal(Decimal):
+            pass
+        for inf in (INF, -INF):
+            for type_ in (float, MyFloat, Decimal, MyDecimal):
+                x = type_(inf)
+                ratio = statistics._exact_ratio(x)
+                self.assertEqual(ratio, (x, None))
+                self.assertEqual(type(ratio[0]), type_)
+                self.assertTrue(math.isinf(ratio[0]))
+
+    def test_float_nan(self):
+        NAN = float("NAN")
+        class MyFloat(float):
+            pass
+        for nan in (NAN, MyFloat(NAN)):
+            ratio = statistics._exact_ratio(nan)
+            self.assertTrue(math.isnan(ratio[0]))
+            self.assertIs(ratio[1], None)
+            self.assertEqual(type(ratio[0]), type(nan))
+
+    def test_decimal_nan(self):
+        NAN = Decimal("NAN")
+        sNAN = Decimal("sNAN")
+        class MyDecimal(Decimal):
+            pass
+        for nan in (NAN, MyDecimal(NAN), sNAN, MyDecimal(sNAN)):
+            ratio = statistics._exact_ratio(nan)
+            self.assertTrue(_nan_equal(ratio[0], nan))
+            self.assertIs(ratio[1], None)
+            self.assertEqual(type(ratio[0]), type(nan))
+
 
 class DecimalToRatioTest(unittest.TestCase):
     # Test _decimal_to_ratio private function.
 
-    def testSpecialsRaise(self):
-        # Test that NANs and INFs raise ValueError.
-        # Non-special values are covered by _exact_ratio above.
-        for d in (Decimal('NAN'), Decimal('sNAN'), Decimal('INF')):
-            self.assertRaises(ValueError, statistics._decimal_to_ratio, d)
+    def test_infinity(self):
+        # Test that INFs are handled correctly.
+        inf = Decimal('INF')
+        self.assertEqual(statistics._decimal_to_ratio(inf), (inf, None))
+        self.assertEqual(statistics._decimal_to_ratio(-inf), (-inf, None))
+
+    def test_nan(self):
+        # Test that NANs are handled correctly.
+        for nan in (Decimal('NAN'), Decimal('sNAN')):
+            num, den = statistics._decimal_to_ratio(nan)
+            # Because NANs always compare non-equal, we cannot use assertEqual.
+            # Nor can we use an identity test, as we don't guarantee anything
+            # about the object identity.
+            self.assertTrue(_nan_equal(num, nan))
+            self.assertIs(den, None)
 
     def test_sign(self):
         # Test sign is calculated correctly.
@@ -718,25 +794,181 @@
         self.assertEqual(t, (147000, 1))
 
 
-class CheckTypeTest(unittest.TestCase):
-    # Test _check_type private function.
+class IsFiniteTest(unittest.TestCase):
+    # Test _isfinite private function.
 
-    def test_allowed(self):
-        # Test that a type which should be allowed is allowed.
-        allowed = set([int, float])
-        statistics._check_type(int, allowed)
-        statistics._check_type(float, allowed)
+    def test_finite(self):
+        # Test that finite numbers are recognised as finite.
+        for x in (5, Fraction(1, 3), 2.5, Decimal("5.5")):
+            self.assertTrue(statistics._isfinite(x))
 
-    def test_not_allowed(self):
-        # Test that a type which should not be allowed raises.
-        allowed = set([int, float])
-        self.assertRaises(TypeError, statistics._check_type, Decimal, allowed)
+    def test_infinity(self):
+        # Test that INFs are not recognised as finite.
+        for x in (float("inf"), Decimal("inf")):
+            self.assertFalse(statistics._isfinite(x))
 
-    def test_add_to_allowed(self):
-        # Test that a second type will be added to the allowed set.
-        allowed = set([int])
-        statistics._check_type(float, allowed)
-        self.assertEqual(allowed, set([int, float]))
+    def test_nan(self):
+        # Test that NANs are not recognised as finite.
+        for x in (float("nan"), Decimal("NAN"), Decimal("sNAN")):
+            self.assertFalse(statistics._isfinite(x))
+
+
+class CoerceTest(unittest.TestCase):
+    # Test that private function _coerce correctly deals with types.
+
+    # The coercion rules are currently an implementation detail, although at
+    # some point that should change. The tests and comments here define the
+    # correct implementation.
+
+    # Pre-conditions of _coerce:
+    #
+    #   - The first time _sum calls _coerce, the
+    #   - coerce(T, S) will never be called with bool as the first argument;
+    #     this is a pre-condition, guarded with an assertion.
+
+    #
+    #   - coerce(T, T) will always return T; we assume T is a valid numeric
+    #     type. Violate this assumption at your own risk.
+    #
+    #   - Apart from as above, bool is treated as if it were actually int.
+    #
+    #   - coerce(int, X) and coerce(X, int) return X.
+    #   -
+    def test_bool(self):
+        # bool is somewhat special, due to the pre-condition that it is
+        # never given as the first argument to _coerce, and that it cannot
+        # be subclassed. So we test it specially.
+        for T in (int, float, Fraction, Decimal):
+            self.assertIs(statistics._coerce(T, bool), T)
+            class MyClass(T): pass
+            self.assertIs(statistics._coerce(MyClass, bool), MyClass)
+
+    def assertCoerceTo(self, A, B):
+        """Assert that type A coerces to B."""
+        self.assertIs(statistics._coerce(A, B), B)
+        self.assertIs(statistics._coerce(B, A), B)
+
+    def check_coerce_to(self, A, B):
+        """Checks that type A coerces to B, including subclasses."""
+        # Assert that type A is coerced to B.
+        self.assertCoerceTo(A, B)
+        # Subclasses of A are also coerced to B.
+        class SubclassOfA(A): pass
+        self.assertCoerceTo(SubclassOfA, B)
+        # A, and subclasses of A, are coerced to subclasses of B.
+        class SubclassOfB(B): pass
+        self.assertCoerceTo(A, SubclassOfB)
+        self.assertCoerceTo(SubclassOfA, SubclassOfB)
+
+    def assertCoerceRaises(self, A, B):
+        """Assert that coercing A to B, or vice versa, raises TypeError."""
+        self.assertRaises(TypeError, statistics._coerce, (A, B))
+        self.assertRaises(TypeError, statistics._coerce, (B, A))
+
+    def check_type_coercions(self, T):
+        """Check that type T coerces correctly with subclasses of itself."""
+        assert T is not bool
+        # Coercing a type with itself returns the same type.
+        self.assertIs(statistics._coerce(T, T), T)
+        # Coercing a type with a subclass of itself returns the subclass.
+        class U(T): pass
+        class V(T): pass
+        class W(U): pass
+        for typ in (U, V, W):
+            self.assertCoerceTo(T, typ)
+        self.assertCoerceTo(U, W)
+        # Coercing two subclasses that aren't parent/child is an error.
+        self.assertCoerceRaises(U, V)
+        self.assertCoerceRaises(V, W)
+
+    def test_int(self):
+        # Check that int coerces correctly.
+        self.check_type_coercions(int)
+        for typ in (float, Fraction, Decimal):
+            self.check_coerce_to(int, typ)
+
+    def test_fraction(self):
+        # Check that Fraction coerces correctly.
+        self.check_type_coercions(Fraction)
+        self.check_coerce_to(Fraction, float)
+
+    def test_decimal(self):
+        # Check that Decimal coerces correctly.
+        self.check_type_coercions(Decimal)
+
+    def test_float(self):
+        # Check that float coerces correctly.
+        self.check_type_coercions(float)
+
+    def test_non_numeric_types(self):
+        for bad_type in (str, list, type(None), tuple, dict):
+            for good_type in (int, float, Fraction, Decimal):
+                self.assertCoerceRaises(good_type, bad_type)
+
+    def test_incompatible_types(self):
+        # Test that incompatible types raise.
+        for T in (float, Fraction):
+            class MySubclass(T): pass
+            self.assertCoerceRaises(T, Decimal)
+            self.assertCoerceRaises(MySubclass, Decimal)
+
+
+class ConvertTest(unittest.TestCase):
+    # Test private _convert function.
+
+    def check_exact_equal(self, x, y):
+        """Check that x equals y, and has the same type as well."""
+        self.assertEqual(x, y)
+        self.assertIs(type(x), type(y))
+
+    def test_int(self):
+        # Test conversions to int.
+        x = statistics._convert(Fraction(71), int)
+        self.check_exact_equal(x, 71)
+        class MyInt(int): pass
+        x = statistics._convert(Fraction(17), MyInt)
+        self.check_exact_equal(x, MyInt(17))
+
+    def test_fraction(self):
+        # Test conversions to Fraction.
+        x = statistics._convert(Fraction(95, 99), Fraction)
+        self.check_exact_equal(x, Fraction(95, 99))
+        class MyFraction(Fraction):
+            def __truediv__(self, other):
+                return self.__class__(super().__truediv__(other))
+        x = statistics._convert(Fraction(71, 13), MyFraction)
+        self.check_exact_equal(x, MyFraction(71, 13))
+
+    def test_float(self):
+        # Test conversions to float.
+        x = statistics._convert(Fraction(-1, 2), float)
+        self.check_exact_equal(x, -0.5)
+        class MyFloat(float):
+            def __truediv__(self, other):
+                return self.__class__(super().__truediv__(other))
+        x = statistics._convert(Fraction(9, 8), MyFloat)
+        self.check_exact_equal(x, MyFloat(1.125))
+
+    def test_decimal(self):
+        # Test conversions to Decimal.
+        x = statistics._convert(Fraction(1, 40), Decimal)
+        self.check_exact_equal(x, Decimal("0.025"))
+        class MyDecimal(Decimal):
+            def __truediv__(self, other):
+                return self.__class__(super().__truediv__(other))
+        x = statistics._convert(Fraction(-15, 16), MyDecimal)
+        self.check_exact_equal(x, MyDecimal("-0.9375"))
+
+    def test_inf(self):
+        for INF in (float('inf'), Decimal('inf')):
+            for inf in (INF, -INF):
+                x = statistics._convert(inf, type(inf))
+                self.check_exact_equal(x, inf)
+
+    def test_nan(self):
+        for nan in (float('nan'), Decimal('NAN'), Decimal('sNAN')):
+            x = statistics._convert(nan, type(nan))
+            self.assertTrue(_nan_equal(x, nan))
 
 
 # === Tests for public functions ===
@@ -874,52 +1106,71 @@
             self.assertIs(type(result), kind)
 
 
-class TestSum(NumericTestCase, UnivariateCommonMixin, UnivariateTypeMixin):
+class TestSumCommon(UnivariateCommonMixin, UnivariateTypeMixin):
+    # Common test cases for statistics._sum() function.
+
+    # This test suite looks only at the numeric value returned by _sum,
+    # after conversion to the appropriate type.
+    def setUp(self):
+        def simplified_sum(*args):
+            T, value, n = statistics._sum(*args)
+            return statistics._coerce(value, T)
+        self.func = simplified_sum
+
+
+class TestSum(NumericTestCase):
     # Test cases for statistics._sum() function.
 
+    # These tests look at the entire three value tuple returned by _sum.
+
     def setUp(self):
         self.func = statistics._sum
 
     def test_empty_data(self):
         # Override test for empty data.
         for data in ([], (), iter([])):
-            self.assertEqual(self.func(data), 0)
-            self.assertEqual(self.func(data, 23), 23)
-            self.assertEqual(self.func(data, 2.3), 2.3)
+            self.assertEqual(self.func(data), (int, Fraction(0), 0))
+            self.assertEqual(self.func(data, 23), (int, Fraction(23), 0))
+            self.assertEqual(self.func(data, 2.3), (float, Fraction(2.3), 0))
 
     def test_ints(self):
-        self.assertEqual(self.func([1, 5, 3, -4, -8, 20, 42, 1]), 60)
-        self.assertEqual(self.func([4, 2, 3, -8, 7], 1000), 1008)
+        self.assertEqual(self.func([1, 5, 3, -4, -8, 20, 42, 1]),
+                         (int, Fraction(60), 8))
+        self.assertEqual(self.func([4, 2, 3, -8, 7], 1000),
+                         (int, Fraction(1008), 5))
 
     def test_floats(self):
-        self.assertEqual(self.func([0.25]*20), 5.0)
-        self.assertEqual(self.func([0.125, 0.25, 0.5, 0.75], 1.5), 3.125)
+        self.assertEqual(self.func([0.25]*20),
+                         (float, Fraction(5.0), 20))
+        self.assertEqual(self.func([0.125, 0.25, 0.5, 0.75], 1.5),
+                         (float, Fraction(3.125), 4))
 
     def test_fractions(self):
-        F = Fraction
-        self.assertEqual(self.func([Fraction(1, 1000)]*500), Fraction(1, 2))
+        self.assertEqual(self.func([Fraction(1, 1000)]*500),
+                         (Fraction, Fraction(1, 2), 500))
 
     def test_decimals(self):
         D = Decimal
         data = [D("0.001"), D("5.246"), D("1.702"), D("-0.025"),
                 D("3.974"), D("2.328"), D("4.617"), D("2.843"),
                 ]
-        self.assertEqual(self.func(data), Decimal("20.686"))
+        self.assertEqual(self.func(data),
+                         (Decimal, Decimal("20.686"), 8))
 
     def test_compare_with_math_fsum(self):
         # Compare with the math.fsum function.
         # Ideally we ought to get the exact same result, but sometimes
         # we differ by a very slight amount :-(
         data = [random.uniform(-100, 1000) for _ in range(1000)]
-        self.assertApproxEqual(self.func(data), math.fsum(data), rel=2e-16)
+        self.assertApproxEqual(float(self.func(data)[1]), math.fsum(data), rel=2e-16)
 
     def test_start_argument(self):
         # Test that the optional start argument works correctly.
         data = [random.uniform(1, 1000) for _ in range(100)]
-        t = self.func(data)
-        self.assertEqual(t+42, self.func(data, 42))
-        self.assertEqual(t-23, self.func(data, -23))
-        self.assertEqual(t+1e20, self.func(data, 1e20))
+        t = self.func(data)[1]
+        self.assertEqual(t+42, self.func(data, 42)[1])
+        self.assertEqual(t-23, self.func(data, -23)[1])
+        self.assertEqual(t+Fraction(1e20), self.func(data, 1e20)[1])
 
     def test_strings_fail(self):
         # Sum of strings should fail.
@@ -934,7 +1185,7 @@
     def test_mixed_sum(self):
         # Mixed input types are not (currently) allowed.
         # Check that mixed data types fail.
-        self.assertRaises(TypeError, self.func, [1, 2.0, Fraction(1, 2)])
+        self.assertRaises(TypeError, self.func, [1, 2.0, Decimal(1)])
         # And so does mixed start argument.
         self.assertRaises(TypeError, self.func, [1, 2.0], Decimal(1))
 
@@ -942,11 +1193,14 @@
 class SumTortureTest(NumericTestCase):
     def test_torture(self):
         # Tim Peters' torture test for sum, and variants of same.
-        self.assertEqual(statistics._sum([1, 1e100, 1, -1e100]*10000), 20000.0)
-        self.assertEqual(statistics._sum([1e100, 1, 1, -1e100]*10000), 20000.0)
-        self.assertApproxEqual(
-            statistics._sum([1e-100, 1, 1e-100, -1]*10000), 2.0e-96, rel=5e-16
-            )
+        self.assertEqual(statistics._sum([1, 1e100, 1, -1e100]*10000),
+                         (float, Fraction(20000.0), 40000))
+        self.assertEqual(statistics._sum([1e100, 1, 1, -1e100]*10000),
+                         (float, Fraction(20000.0), 40000))
+        T, num, count = statistics._sum([1e-100, 1, 1e-100, -1]*10000)
+        self.assertIs(T, float)
+        self.assertEqual(count, 40000)
+        self.assertApproxEqual(float(num), 2.0e-96, rel=5e-16)
 
 
 class SumSpecialValues(NumericTestCase):
@@ -955,7 +1209,7 @@
     def test_nan(self):
         for type_ in (float, Decimal):
             nan = type_('nan')
-            result = statistics._sum([1, nan, 2])
+            result = statistics._sum([1, nan, 2])[1]
             self.assertIs(type(result), type_)
             self.assertTrue(math.isnan(result))
 
@@ -968,10 +1222,10 @@
 
     def do_test_inf(self, inf):
         # Adding a single infinity gives infinity.
-        result = statistics._sum([1, 2, inf, 3])
+        result = statistics._sum([1, 2, inf, 3])[1]
         self.check_infinity(result, inf)
         # Adding two infinities of the same sign also gives infinity.
-        result = statistics._sum([1, 2, inf, 3, inf, 4])
+        result = statistics._sum([1, 2, inf, 3, inf, 4])[1]
         self.check_infinity(result, inf)
 
     def test_float_inf(self):
@@ -987,7 +1241,7 @@
     def test_float_mismatched_infs(self):
         # Test that adding two infinities of opposite sign gives a NAN.
         inf = float('inf')
-        result = statistics._sum([1, 2, inf, 3, -inf, 4])
+        result = statistics._sum([1, 2, inf, 3, -inf, 4])[1]
         self.assertTrue(math.isnan(result))
 
     def test_decimal_extendedcontext_mismatched_infs_to_nan(self):
@@ -995,7 +1249,7 @@
         inf = Decimal('inf')
         data = [1, 2, inf, 3, -inf, 4]
         with decimal.localcontext(decimal.ExtendedContext):
-            self.assertTrue(math.isnan(statistics._sum(data)))
+            self.assertTrue(math.isnan(statistics._sum(data)[1]))
 
     def test_decimal_basiccontext_mismatched_infs_to_nan(self):
         # Test adding Decimal INFs with opposite sign raises InvalidOperation.
@@ -1111,6 +1365,19 @@
         d = Decimal('1e4')
         self.assertEqual(statistics.mean([d]), d)
 
+    def test_regression_25177(self):
+        # Regression test for issue 25177.
+        # Ensure very big and very small floats don't overflow.
+        # See http://bugs.python.org/issue25177.
+        self.assertEqual(statistics.mean(
+            [8.988465674311579e+307, 8.98846567431158e+307]),
+            8.98846567431158e+307)
+        big = 8.98846567431158e+307
+        tiny = 5e-324
+        for n in (2, 3, 5, 200):
+            self.assertEqual(statistics.mean([big]*n), big)
+            self.assertEqual(statistics.mean([tiny]*n), tiny)
+
 
 class TestMedian(NumericTestCase, AverageMixin):
     # Common tests for median and all median.* functions.
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -107,6 +107,10 @@
 Library
 -------
 
+- Issue #25177: Fixed problem with the mean of very small and very large
+  numbers. As a side effect, statistics.mean and statistics.variance should
+  be significantly faster.
+
 - Issue #25718: Fixed copying object with state with boolean value is false.
 
 - Issue #10131: Fixed deep copying of minidom documents.  Based on patch

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list