对于直线拟合
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(1, 10, 100)
X = np.ones((len(x), 2))
X[:, 0] = x
W_true = np.array([2, 3])
y = X.dot(W_true)
y_noise = y + np.random.randn(len(x))
fig = plt.figure(figsize=(8, 6))
plt.scatter(x, y_noise, s=10, c='r', marker='o')
plt.plot(x, y, label='true value', c='b')
plt.legend()
plt.show()
真实的直线方程为y=2x+3,又由于添加了均值为0,方差为1的噪声点,所以图上的红点便是我们要拟合的数据。
利用最小二乘法根据噪声点求出拟合直线的a和b,即下列公式的w。y为我们真实观察到的数据,即对应代码中的y_noise。
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(1, 10, 100)
X = np.ones((len(x), 2))
X[:, 0] = x
W_true = np.array([2, 3])
y = X.dot(W_true)
y_noise = y + np.random.randn(len(x))
# 求出拟合直线的权重
W_pred = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y_noise)
y_pred = X.dot(W_pred)
fig = plt.figure(figsize=(8, 6))
plt.scatter(x, y_noise, s=10, c='r', marker='o')
plt.plot(x, y, label='true value', c='b', zorder=1)
plt.plot(x, y_pred, label='pred value', c='g', zorder=2)
plt.legend()
plt.show()
拟合曲线
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(1, 10, 100)
X = np.ones((len(x), 2))
X[:, 0] = x
y = np.sin(0.5 * x) + 0.1 * x
y_noise = y + np.random.randn(len(x))
# 求出拟合直线的权重
W_pred = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y_noise)
y_pred = X.dot(W_pred)
fig = plt.figure(figsize=(8, 6))
plt.scatter(x, y_noise, s=10, c='r', marker='o')
plt.plot(x, y, label='true value', c='b', zorder=1)
plt.plot(x, y_pred, label='pred value', c='g', zorder=2)
plt.legend()
plt.show()
真实的y=sin(0.5x)+0.1x,但我们还是用直线取拟合上面的数据,效果显然不太好。现打算用多项式拟合数据,公式如下:
import numpy as np
import matplotlib.pyplot as plt
x = np.linspace(1, 10, 100)
y = np.sin(0.5 * x) + 0.1 * x
y_noise = y + np.random.randn(len(x))
# 设多项式最高次数为4
times = 4
X = np.ones((len(x), times))
for i in range(times):
X[:, i] = x ** (times - i)
# 求取参数W,即(a,b,...,d)
W_pred = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y_noise)
y_pred = X.dot(W_pred)
fig = plt.figure(figsize=(8, 6))
plt.scatter(x, y_noise, s=10, c='r', marker='o')
plt.plot(x, y, label='true value', c='b', zorder=1)
plt.plot(x, y_pred, label='pred value', c='g', zorder=2)
plt.legend()
plt.show()
当前设定的最高次数为5,拟合的还可以。当最高次数设置为1时,拟合曲线便成了直线,为欠拟合状态。当最高次数为10时,为过拟合状态。
使用scipy.optimize拟合曲线
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import leastsq
# 多项式匹配,最高次数为p
def func(p, x):
f = np.poly1d(p)
return f(x)
# 误差
def resudual(p, x, y):
error = y - func(p, x)
return error
x = np.linspace(1, 10, 100)
y = np.sin(0.5 * x) + 0.1 * x
y_noise = y + np.random.randn(len(x))
# 设多项式最高次数为4
times = 4
X = np.ones((len(x), times))
for i in range(times):
X[:, i] = x ** (times - i)
# 求取参数W,即(a,b,...,d)
p_init = np.random.randn(times)
para = leastsq(resudual, p_init, args=(x, y_noise))
y_pred = func(para[0], x)
fig = plt.figure(figsize=(8, 6))
plt.scatter(x, y_noise, s=10, c='r', marker='o')
plt.plot(x, y, label='true value', c='b', zorder=1)
plt.plot(x, y_pred, label='pred value', c='g', zorder=2)
plt.legend()
plt.show()