bpo-45766: Add direct proportion option to linear_regression(). (#29490)

* bpo-45766: Add direct proportion option to linear_regression().

* Update 2021-11-09-09-18-06.bpo-45766.dvbcMf.rst

* Use ellipsis to avoid round-off issues.

* Update Misc/NEWS.d/next/Library/2021-11-09-09-18-06.bpo-45766.dvbcMf.rst

Co-authored-by: Erlend Egeberg Aasland <erlend.aasland@innova.no>

* Update signature in main docs

* Fix missing comma

Co-authored-by: Erlend Egeberg Aasland <erlend.aasland@innova.no>
This commit is contained in:
Raymond Hettinger 2021-11-21 08:39:26 -06:00 committed by GitHub
parent 2afa1a1266
commit d2b55b07d2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 42 additions and 8 deletions

View file

@ -643,7 +643,7 @@ However, for reading convenience, most of the examples show sorted sequences.
.. versionadded:: 3.10
.. function:: linear_regression(x, y, /)
.. function:: linear_regression(x, y, /, *, proportional=False)
Return the slope and intercept of `simple linear regression
<https://en.wikipedia.org/wiki/Simple_linear_regression>`_
@ -677,8 +677,18 @@ However, for reading convenience, most of the examples show sorted sequences.
>>> round(slope * 2019 + intercept)
16
If *proportional* is true, the independent variable *x* and the
dependent variable *y* are assumed to be directly proportional.
The data is fit to a line passing through the origin.
Since the *intercept* will always be 0.0, the underlying linear
function simplifies to:
*y = slope \* x + noise*
.. versionadded:: 3.10
.. versionchanged:: 3.11
Added support for *proportional*.
Exceptions
----------

View file

@ -937,13 +937,13 @@ def correlation(x, y, /):
LinearRegression = namedtuple('LinearRegression', ('slope', 'intercept'))
def linear_regression(x, y, /):
def linear_regression(x, y, /, *, proportional=False):
"""Slope and intercept for simple linear regression.
Return the slope and intercept of simple linear regression
parameters estimated using ordinary least squares. Simple linear
regression describes relationship between an independent variable
*x* and a dependent variable *y* in terms of linear function:
*x* and a dependent variable *y* in terms of a linear function:
y = slope * x + intercept + noise
@ -961,21 +961,38 @@ def linear_regression(x, y, /):
>>> linear_regression(x, y) #doctest: +ELLIPSIS
LinearRegression(slope=3.09078914170..., intercept=1.75684970486...)
If *proportional* is true, the independent variable *x* and the
dependent variable *y* are assumed to be directly proportional.
The data is fit to a line passing through the origin.
Since the *intercept* will always be 0.0, the underlying linear
function simplifies to:
y = slope * x + noise
>>> y = [3 * x[i] + noise[i] for i in range(5)]
>>> linear_regression(x, y, proportional=True) #doctest: +ELLIPSIS
LinearRegression(slope=3.02447542484..., intercept=0.0)
"""
n = len(x)
if len(y) != n:
raise StatisticsError('linear regression requires that both inputs have same number of data points')
if n < 2:
raise StatisticsError('linear regression requires at least two data points')
xbar = fsum(x) / n
ybar = fsum(y) / n
sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))
sxx = fsum((d := xi - xbar) * d for xi in x)
if proportional:
sxy = fsum(xi * yi for xi, yi in zip(x, y))
sxx = fsum(xi * xi for xi in x)
else:
xbar = fsum(x) / n
ybar = fsum(y) / n
sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))
sxx = fsum((d := xi - xbar) * d for xi in x)
try:
slope = sxy / sxx # equivalent to: covariance(x, y) / variance(x)
except ZeroDivisionError:
raise StatisticsError('x is constant')
intercept = ybar - slope * xbar
intercept = 0.0 if proportional else ybar - slope * xbar
return LinearRegression(slope=slope, intercept=intercept)

View file

@ -2527,6 +2527,12 @@ def test_results(self):
self.assertAlmostEqual(intercept, true_intercept)
self.assertAlmostEqual(slope, true_slope)
def test_proportional(self):
x = [10, 20, 30, 40]
y = [180, 398, 610, 799]
slope, intercept = statistics.linear_regression(x, y, proportional=True)
self.assertAlmostEqual(slope, 20 + 1/150)
self.assertEqual(intercept, 0.0)
class TestNormalDist:

View file

@ -0,0 +1 @@
Added *proportional* option to :meth:`statistics.linear_regression`.