From 3d180e3ab21c5d41d1c46e3ef349b30ba409f300 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 29 Aug 2022 12:19:48 -0500 Subject: [PATCH] Improve accuracy for Spearman's rank correlation coefficient. (#96392) --- Lib/statistics.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index b4676fed5e2..b4adabd3f05 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -379,7 +379,7 @@ def _rank(data, /, *, key=None, reverse=False, ties='average', start=1) -> list[ [2.0, 1.0, 3.0] Ranks are conventionally numbered starting from one; however, - setting *start* to zero allow the ranks to be used as array indices: + setting *start* to zero allows the ranks to be used as array indices: >>> prize = ['Gold', 'Silver', 'Bronze', 'Certificate'] >>> scores = [8.1, 7.3, 9.4, 8.3] @@ -1073,8 +1073,9 @@ def correlation(x, y, /, *, method='linear'): if method not in {'linear', 'ranked'}: raise ValueError(f'Unknown method: {method!r}') if method == 'ranked': - x = _rank(x) - y = _rank(y) + start = (n - 1) / -2 # Center rankings around zero + x = _rank(x, start=start) + y = _rank(y, start=start) xbar = fsum(x) / n ybar = fsum(y) / n sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))