gh-112540: Support zero inputs in geometric_mean() (gh-112880)

This commit is contained in:
Raymond Hettinger 2023-12-08 12:05:56 -06:00 committed by GitHub
parent 76929fdeeb
commit f3bff4ee9d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 35 additions and 9 deletions

View file

@ -527,8 +527,10 @@ def count(iterable):
def geometric_mean(data): def geometric_mean(data):
"""Convert data to floats and compute the geometric mean. """Convert data to floats and compute the geometric mean.
Raises a StatisticsError if the input dataset is empty, Raises a StatisticsError if the input dataset is empty
if it contains a zero, or if it contains a negative value. or if it contains a negative value.
Returns zero if the product of inputs is zero.
No special efforts are made to achieve exact results. No special efforts are made to achieve exact results.
(However, this may change in the future.) (However, this may change in the future.)
@ -536,11 +538,25 @@ def geometric_mean(data):
>>> round(geometric_mean([54, 24, 36]), 9) >>> round(geometric_mean([54, 24, 36]), 9)
36.0 36.0
""" """
try: n = 0
return exp(fmean(map(log, data))) found_zero = False
except ValueError: def count_positive(iterable):
raise StatisticsError('geometric mean requires a non-empty dataset ' nonlocal n, found_zero
'containing positive numbers') from None for n, x in enumerate(iterable, start=1):
if x > 0.0 or math.isnan(x):
yield x
elif x == 0.0:
found_zero = True
else:
raise StatisticsError('No negative inputs allowed', x)
total = fsum(map(log, count_positive(data)))
if not n:
raise StatisticsError('Must have a non-empty dataset')
if math.isnan(total):
return math.nan
if found_zero:
return math.nan if total == math.inf else 0.0
return exp(total / n)
def harmonic_mean(data, weights=None): def harmonic_mean(data, weights=None):

View file

@ -2302,10 +2302,12 @@ def test_error_cases(self):
StatisticsError = statistics.StatisticsError StatisticsError = statistics.StatisticsError
with self.assertRaises(StatisticsError): with self.assertRaises(StatisticsError):
geometric_mean([]) # empty input geometric_mean([]) # empty input
with self.assertRaises(StatisticsError):
geometric_mean([3.5, 0.0, 5.25]) # zero input
with self.assertRaises(StatisticsError): with self.assertRaises(StatisticsError):
geometric_mean([3.5, -4.0, 5.25]) # negative input geometric_mean([3.5, -4.0, 5.25]) # negative input
with self.assertRaises(StatisticsError):
geometric_mean([0.0, -4.0, 5.25]) # negative input with zero
with self.assertRaises(StatisticsError):
geometric_mean([3.5, -math.inf, 5.25]) # negative infinity
with self.assertRaises(StatisticsError): with self.assertRaises(StatisticsError):
geometric_mean(iter([])) # empty iterator geometric_mean(iter([])) # empty iterator
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
@ -2328,6 +2330,12 @@ def test_special_values(self):
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
geometric_mean([Inf, -Inf]) geometric_mean([Inf, -Inf])
# Cases with zero
self.assertEqual(geometric_mean([3, 0.0, 5]), 0.0) # Any zero gives a zero
self.assertEqual(geometric_mean([3, -0.0, 5]), 0.0) # Negative zero allowed
self.assertTrue(math.isnan(geometric_mean([0, NaN]))) # NaN beats zero
self.assertTrue(math.isnan(geometric_mean([0, Inf]))) # Because 0.0 * Inf -> NaN
def test_mixed_int_and_float(self): def test_mixed_int_and_float(self):
# Regression test for b.p.o. issue #28327 # Regression test for b.p.o. issue #28327
geometric_mean = statistics.geometric_mean geometric_mean = statistics.geometric_mean

View file

@ -0,0 +1,2 @@
The statistics.geometric_mean() function now returns zero for datasets
containing a zero. Formerly, it would raise an exception.