import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
= 10
n_datapoints 1)
np.random.seed(= 10 * np.random.rand(n_datapoints)
x = 2 * x + 1 + (3 * np.random.randn(n_datapoints)) y
= LinearRegression(fit_intercept=True)
ols_model
ols_model.fit(x[:, np.newaxis], y)# get prediction
= ols_model.predict(x[:, np.newaxis])
y_pred
= y - y_pred
residual
# get prediction for best line fit
= np.linspace(0, 8, 50)
xfit = ols_model.predict(xfit[:, np.newaxis])
y_
= plt.subplots(1, 2, figsize=(10.5, 4))
fig, axs
= axs[0]
ax
ax.scatter(
x,
y,="Data points",
label="k",
edgecolors
)
# plot data
ax.plot(0, 8],
[
[y.mean(), y.mean()],="#ff7f0e",
color="Initial fit",
label
)for i in range(len(x)):
ax.plot(
[x[i], x[i]],
[y[i], y.mean()],="gray",
color="--",
linestyle
)True)
ax.grid("x")
ax.set_xlabel("y")
ax.set_ylabel(
ax.legend()"Initial model")
ax.set_title(
= axs[1]
ax
ax.scatter(
x,
y,="Data points",
label="k",
edgecolors
)# plot best line fit
ax.plot(
xfit,
y_,="#2ca02c",
color="Best fit",
label
)# Optionally, plot residuals (errors)
for i in range(len(x)):
ax.plot(
[x[i], x[i]],
[y[i], y_pred[i]],="gray",
color="--",
linestyle
)
ax.scatter(="green", label="Predicted value"
x, y_pred, color# If you want to show where the predicted points lie on the line
)
ax.annotate("residual",
=(1, -10),
xy="data",
xycoords=(0.2, 0.1),
xytext="axes fraction",
textcoords="top",
va="left",
ha=16,
fontsize=dict(
arrowprops="->",
arrowstyle="black",
facecolor
),
)
True)
ax.grid("x")
ax.set_xlabel("y")
ax.set_ylabel("Fited model")
ax.set_title(
ax.legend() plt.show()
= sp.odr.Data(x, y)
data
def linear_model(w, x):
return w[0] + (w[1] * x)
= sp.odr.Model(linear_model)
model = sp.odr.ODR(data, model, beta0=[0.1, 0.5])
myodr = myodr.run()
myoutput myoutput.pprint()
Beta: [-2.6044511 3.1248936]
Beta Std Error: [3.21297347 0.90341949]
Beta Covariance: [[ 4.95263004 -1.23196863]
[-1.23196863 0.39156198]]
Residual Variance: 2.0843871730397523
Inverse Condition #: 0.11473302487938446
Reason(s) for Halting:
Sum of squares convergence
= plt.subplots(1, 1, figsize=(6, 4))
fig, ax
ax.scatter(
x,
y,="Data points",
label="k",
edgecolors
)
ax.scatter(
x,
linear_model(myoutput.beta, x),# label="Data points",
="r",
edgecolors
)
ax.plot(
x,
linear_model(myoutput.beta, x),# label="Data points",
="r",
color
)
ax.scatter(
x,
y_pred,# label="Data points",
="green",
edgecolors
)
ax.plot(
x,
y_pred,="green",
color
)
True) ax.grid(
Test with centered data
def centering(Z: np.ndarray) -> np.ndarray:
= Z.min(0)
Z_min = Z.max(0)
Z_max = (Z - Z_min) / (Z_max - Z_min)
Z_norm = Z_norm.mean(axis=0, keepdims=True)
Z_means
def reverse_centering(Z_centered: np.ndarray) -> np.ndarray:
= Z_centered + Z_means
Z_norm = Z_norm * (Z_max - Z_min) + Z_min
Z return Z
return Z_norm - Z_means, reverse_centering
= np.column_stack((x, y))
Z = centering(Z)
Z_centered, reverse_centering
Z, Z_centered
(array([[ 4.17022005e+00, 1.45748754e+01],
[ 7.20324493e+00, 1.31228692e+01],
[ 1.14374817e-03, 1.95940478e+00],
[ 3.02332573e+00, 6.29854033e+00],
[ 1.46755891e+00, 8.32144163e+00],
[ 9.23385948e-01, -3.33365023e+00],
[ 1.86260211e+00, 3.75795262e+00],
[ 3.45560727e+00, 6.75905148e+00],
[ 3.96767474e+00, 1.23366578e+01],
[ 5.38816734e+00, 8.47666088e+00]]),
array([[ 0.14217059, 0.41027917],
[ 0.56330115, 0.32920012],
[-0.43669885, -0.29416021],
[-0.01707382, -0.0518658 ],
[-0.2330895 , 0.06109164],
[-0.30864703, -0.58972083],
[-0.1782384 , -0.19373051],
[ 0.04294777, -0.02615117],
[ 0.1140475 , 0.28529861],
[ 0.31128058, 0.06975898]]))
= reverse_centering(Z_centered)
Z_
assert ((Z - Z_) < 1e-10).all()
Z_centered
array([[ 0.14217059, 0.41027917],
[ 0.56330115, 0.32920012],
[-0.43669885, -0.29416021],
[-0.01707382, -0.0518658 ],
[-0.2330895 , 0.06109164],
[-0.30864703, -0.58972083],
[-0.1782384 , -0.19373051],
[ 0.04294777, -0.02615117],
[ 0.1140475 , 0.28529861],
[ 0.31128058, 0.06975898]])
= plt.subplots(1, 1, figsize=(6, 4))
fig, ax
= LinearRegression(fit_intercept=True)
ols_model_ 0][:, np.newaxis], Z_centered[:, 1])
ols_model_.fit(Z_centered[:, # get prediction
= ols_model_.predict(Z_centered[:, 0][:, np.newaxis])
y_pred_
ax.scatter(0],
Z_centered[:, 1],
Z_centered[:, ="Data points",
label="k",
edgecolors
)
ax.scatter(0],
Z_centered[:,
y_pred_,# label="Data points",
="green",
edgecolors
)
ax.plot(0],
Z_centered[:,
y_pred_,="green",
color
)
True) ax.grid(
Test revert centering: - plot predicted line based on reverted predictions - plot predicted points based on original predictions
= plt.subplots(1, 1, figsize=(6, 4))
fig, ax
= np.column_stack((Z_centered[:, 0], y_pred_))
Z_centered_ = reverse_centering(Z_centered_)
Z_
ax.scatter(0],
Z[:, 1],
Z[:, ="Data points",
label="k",
edgecolors
)
ax.scatter(0],
Z[:,
y_pred,# label="Data points",
="green",
edgecolors
)
ax.plot(0],
Z_[:, 1],
Z_[:, ="green",
color
)
True) ax.grid(
Total least square on centered data
= np.linalg.eig(Z_centered.T @ Z_centered)
e_val, e_vec = e_val.argsort()[0]
idx = e_vec[:, idx]
u = u[:, np.newaxis]
u e_val, e_vec, idx, u
(array([0.20313455, 1.46039495]),
array([[-0.71278528, -0.70138231],
[ 0.70138231, -0.71278528]]),
0,
array([[-0.71278528],
[ 0.70138231]]))
= plt.subplots(1, 1, figsize=(6, 6))
fig, ax
ax.scatter(0],
Z_centered[:, 1],
Z_centered[:, ="Data points",
label="k",
edgecolors
)
ax.scatter(0],
Z_centered[:,
y_pred_,# label="Data points",
="green",
edgecolors
)
ax.plot(0],
Z_centered[:,
y_pred_,="green",
color
)
= (-u[0] / u[1])[0]
negative_a_over_b = Z_centered[:, 0].dot(negative_a_over_b)
y_pred__ = np.column_stack((Z_centered[:, 0], y_pred__))
Z_centered_
ax.plot(0],
Z_centered_[:, 1],
Z_centered_[:, "r-",
)= -Z_centered.dot(u).dot(u.T)
Z_centered_tls = Z_centered_tls[:, :-1]
X_tls_error = Z_centered[:, 0][:, np.newaxis] + X_tls_error
X_tls = (X_tls).dot(negative_a_over_b)
y_pred_tls
ax.scatter(
X_tls,
y_pred_tls,="r",
color
)
for i in range(len(Z_centered)):
plt.plot(0], Z_centered[i, 0]],
[X_tls[i, 0], Z_centered[i, 1]],
[y_pred_tls[i, # marker="o",
="--",
linestyle="gray",
color="Line between Points",
label
)
True) ax.grid(