mirror of
https://github.com/python/cpython
synced 2024-09-15 23:57:10 +00:00
bpo-45766: Add direct proportion option to linear_regression(). (#29490)
* bpo-45766: Add direct proportion option to linear_regression(). * Update 2021-11-09-09-18-06.bpo-45766.dvbcMf.rst * Use ellipsis to avoid round-off issues. * Update Misc/NEWS.d/next/Library/2021-11-09-09-18-06.bpo-45766.dvbcMf.rst Co-authored-by: Erlend Egeberg Aasland <erlend.aasland@innova.no> * Update signature in main docs * Fix missing comma Co-authored-by: Erlend Egeberg Aasland <erlend.aasland@innova.no>
This commit is contained in:
parent
2afa1a1266
commit
d2b55b07d2
|
@ -643,7 +643,7 @@ However, for reading convenience, most of the examples show sorted sequences.
|
|||
|
||||
.. versionadded:: 3.10
|
||||
|
||||
.. function:: linear_regression(x, y, /)
|
||||
.. function:: linear_regression(x, y, /, *, proportional=False)
|
||||
|
||||
Return the slope and intercept of `simple linear regression
|
||||
<https://en.wikipedia.org/wiki/Simple_linear_regression>`_
|
||||
|
@ -677,8 +677,18 @@ However, for reading convenience, most of the examples show sorted sequences.
|
|||
>>> round(slope * 2019 + intercept)
|
||||
16
|
||||
|
||||
If *proportional* is true, the independent variable *x* and the
|
||||
dependent variable *y* are assumed to be directly proportional.
|
||||
The data is fit to a line passing through the origin.
|
||||
Since the *intercept* will always be 0.0, the underlying linear
|
||||
function simplifies to:
|
||||
|
||||
*y = slope \* x + noise*
|
||||
|
||||
.. versionadded:: 3.10
|
||||
|
||||
.. versionchanged:: 3.11
|
||||
Added support for *proportional*.
|
||||
|
||||
Exceptions
|
||||
----------
|
||||
|
|
|
@ -937,13 +937,13 @@ def correlation(x, y, /):
|
|||
LinearRegression = namedtuple('LinearRegression', ('slope', 'intercept'))
|
||||
|
||||
|
||||
def linear_regression(x, y, /):
|
||||
def linear_regression(x, y, /, *, proportional=False):
|
||||
"""Slope and intercept for simple linear regression.
|
||||
|
||||
Return the slope and intercept of simple linear regression
|
||||
parameters estimated using ordinary least squares. Simple linear
|
||||
regression describes relationship between an independent variable
|
||||
*x* and a dependent variable *y* in terms of linear function:
|
||||
*x* and a dependent variable *y* in terms of a linear function:
|
||||
|
||||
y = slope * x + intercept + noise
|
||||
|
||||
|
@ -961,21 +961,38 @@ def linear_regression(x, y, /):
|
|||
>>> linear_regression(x, y) #doctest: +ELLIPSIS
|
||||
LinearRegression(slope=3.09078914170..., intercept=1.75684970486...)
|
||||
|
||||
If *proportional* is true, the independent variable *x* and the
|
||||
dependent variable *y* are assumed to be directly proportional.
|
||||
The data is fit to a line passing through the origin.
|
||||
|
||||
Since the *intercept* will always be 0.0, the underlying linear
|
||||
function simplifies to:
|
||||
|
||||
y = slope * x + noise
|
||||
|
||||
>>> y = [3 * x[i] + noise[i] for i in range(5)]
|
||||
>>> linear_regression(x, y, proportional=True) #doctest: +ELLIPSIS
|
||||
LinearRegression(slope=3.02447542484..., intercept=0.0)
|
||||
|
||||
"""
|
||||
n = len(x)
|
||||
if len(y) != n:
|
||||
raise StatisticsError('linear regression requires that both inputs have same number of data points')
|
||||
if n < 2:
|
||||
raise StatisticsError('linear regression requires at least two data points')
|
||||
xbar = fsum(x) / n
|
||||
ybar = fsum(y) / n
|
||||
sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))
|
||||
sxx = fsum((d := xi - xbar) * d for xi in x)
|
||||
if proportional:
|
||||
sxy = fsum(xi * yi for xi, yi in zip(x, y))
|
||||
sxx = fsum(xi * xi for xi in x)
|
||||
else:
|
||||
xbar = fsum(x) / n
|
||||
ybar = fsum(y) / n
|
||||
sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))
|
||||
sxx = fsum((d := xi - xbar) * d for xi in x)
|
||||
try:
|
||||
slope = sxy / sxx # equivalent to: covariance(x, y) / variance(x)
|
||||
except ZeroDivisionError:
|
||||
raise StatisticsError('x is constant')
|
||||
intercept = ybar - slope * xbar
|
||||
intercept = 0.0 if proportional else ybar - slope * xbar
|
||||
return LinearRegression(slope=slope, intercept=intercept)
|
||||
|
||||
|
||||
|
|
|
@ -2527,6 +2527,12 @@ def test_results(self):
|
|||
self.assertAlmostEqual(intercept, true_intercept)
|
||||
self.assertAlmostEqual(slope, true_slope)
|
||||
|
||||
def test_proportional(self):
|
||||
x = [10, 20, 30, 40]
|
||||
y = [180, 398, 610, 799]
|
||||
slope, intercept = statistics.linear_regression(x, y, proportional=True)
|
||||
self.assertAlmostEqual(slope, 20 + 1/150)
|
||||
self.assertEqual(intercept, 0.0)
|
||||
|
||||
class TestNormalDist:
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Added *proportional* option to :meth:`statistics.linear_regression`.
|
Loading…
Reference in a new issue