gh-112540: Support zero inputs in geometric_mean() (gh-112880)

This commit is contained in:
Raymond Hettinger 2023-12-08 12:05:56 -06:00 committed by GitHub
parent 76929fdeeb
commit f3bff4ee9d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 35 additions and 9 deletions

View file

@ -527,8 +527,10 @@ def count(iterable):
def geometric_mean(data):
"""Convert data to floats and compute the geometric mean.
Raises a StatisticsError if the input dataset is empty,
if it contains a zero, or if it contains a negative value.
Raises a StatisticsError if the input dataset is empty
or if it contains a negative value.
Returns zero if the product of inputs is zero.
No special efforts are made to achieve exact results.
(However, this may change in the future.)
@ -536,11 +538,25 @@ def geometric_mean(data):
>>> round(geometric_mean([54, 24, 36]), 9)
36.0
"""
try:
return exp(fmean(map(log, data)))
except ValueError:
raise StatisticsError('geometric mean requires a non-empty dataset '
'containing positive numbers') from None
n = 0
found_zero = False
def count_positive(iterable):
nonlocal n, found_zero
for n, x in enumerate(iterable, start=1):
if x > 0.0 or math.isnan(x):
yield x
elif x == 0.0:
found_zero = True
else:
raise StatisticsError('No negative inputs allowed', x)
total = fsum(map(log, count_positive(data)))
if not n:
raise StatisticsError('Must have a non-empty dataset')
if math.isnan(total):
return math.nan
if found_zero:
return math.nan if total == math.inf else 0.0
return exp(total / n)
def harmonic_mean(data, weights=None):

View file

@ -2302,10 +2302,12 @@ def test_error_cases(self):
StatisticsError = statistics.StatisticsError
with self.assertRaises(StatisticsError):
geometric_mean([]) # empty input
with self.assertRaises(StatisticsError):
geometric_mean([3.5, 0.0, 5.25]) # zero input
with self.assertRaises(StatisticsError):
geometric_mean([3.5, -4.0, 5.25]) # negative input
with self.assertRaises(StatisticsError):
geometric_mean([0.0, -4.0, 5.25]) # negative input with zero
with self.assertRaises(StatisticsError):
geometric_mean([3.5, -math.inf, 5.25]) # negative infinity
with self.assertRaises(StatisticsError):
geometric_mean(iter([])) # empty iterator
with self.assertRaises(TypeError):
@ -2328,6 +2330,12 @@ def test_special_values(self):
with self.assertRaises(ValueError):
geometric_mean([Inf, -Inf])
# Cases with zero
self.assertEqual(geometric_mean([3, 0.0, 5]), 0.0) # Any zero gives a zero
self.assertEqual(geometric_mean([3, -0.0, 5]), 0.0) # Negative zero allowed
self.assertTrue(math.isnan(geometric_mean([0, NaN]))) # NaN beats zero
self.assertTrue(math.isnan(geometric_mean([0, Inf]))) # Because 0.0 * Inf -> NaN
def test_mixed_int_and_float(self):
# Regression test for b.p.o. issue #28327
geometric_mean = statistics.geometric_mean

View file

@ -0,0 +1,2 @@
The statistics.geometric_mean() function now returns zero for datasets
containing a zero. Formerly, it would raise an exception.