线性回归模型
其中包括5个方法
1、最小二乘法调用numpy包实现
2、最小二乘法调用scipy包实现
3、自己编写最小二乘法实现
4、线性回归模型调用sklearn包实现
5、自己编写线性回归方法实现
示例结果
代码
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import leastsq # 方法二中使用
from sklearn import linear_model
data = np.loadtxt('challenge_dataset.txt', delimiter=',')
print('data.shape: {0}'.format(data.shape))
print('data.type : {0}'.format(type(data)))
# 线性回归模型类
class Linear_regression_methods:
def __init__(self, data):
self.data = data
self.x = data[:, 0]
self.y = data[:, 1]
def plt_method(self, title, a, b):
# title.type is str
# a is weight, b is bias
plt.title(title)
plt.plot(self.x, self.y, 'o', label='data', markersize=10)
plt.plot(self.x, a * self.x + b, 'r', label='line')
plt.legend()
plt.show()
def print_method(self, title, a, b):
return print('-'*50 + "\n{}\ny = {:.5f}x + {:.5f}".format(title, a, b))
def computer_error(self, a, b):
x = self.data[:, 0]
y = self.data[:, 1]
totalError = (y - (a * x + b)) ** 2
totalError = np.sum(totalError, axis=0)
results = totalError / float(len(data))
return print('this model final error: {:.5f}'.format(results))
def one_leastsq_call_numpy_pakeage(self):
# 调用numpy.linalg.lstsq()方法
A = np.vstack([self.x, np.ones(len(self.x))]).T
a, b = np.linalg.lstsq(A, self.y)[0] # 求一个线性方程组的最小二乘解
self.print_method('first leastsq_call_numpy_pakeage', a, b)
self.plt_method('first leastsq_call_numpy_pakeage', a, b) # 调用画图方法
self.computer_error(a, b)
def two_leatsq_call_scipy_pakeage(self):
# 调用scipy.optimize中的lestsq方法
def fun(p, x): # 定义想要拟合的函数
k, b = p # 从参数p获得拟合参数
return k*x + b
def err(p, x, y): # 定义误差函数
return fun(p, x) - y
# 定义起始的参数 即从 y = 1*x+1 开始,其实这个值可以随便设,只不过会影响到找到最优解的时间
p0 = [1, 1] # 也可随机初始化
# leastsq函数需传入numpy类型
xishu = leastsq(err, p0, args=(self.x, self.y))
self.print_method('second leatsq_call_scipy_pakeage', xishu[0][0], xishu[0][1])
self.plt_method('second leatsq_call_scipy_pakeage', xishu[0][0], xishu[0][1])
self.computer_error(xishu[0][0], xishu[0][1])
def three_leastsq_function(self):
# 最小二乘法手动实现方法
def calcAB(x, y):
n = len(x)
sumX, sumY, sumXY, sumXX=0, 0, 0, 0
for i in range(0, n):
sumX += x[i]
sumY += y[i]
sumXX += x[i]*x[i]
sumXY += x[i]*y[i]
a = (sumXY - (1/n) * (sumX * sumY)) / (sumXX - (1/n) * sumX * sumX)
b = sumY/n - a * sumX/n
return a, b
a, b = calcAB(self.x, self.y)
self.print_method('third leastsq_function', a, b)
self.plt_method('third leastsq_function', a, b)
self.computer_error(a, b)
def four_linear_model_call_sklearn(self):
# train model on data
body_reg = linear_model.LinearRegression()
x_values = self.x.reshape(-1, 1)
y_values = self.y.reshape(-1, 1)
body_reg.fit(x_values, y_values)
results = body_reg.predict(x_values)
a = float((results[0] - results[1]) / (self.x[0] - self.x[1])) # 确定两点求直线的斜率与截距
b = float(results[1] - a * self.x[1])
self.print_method('fourth linear_model_call_sklearn', a, b)
self.plt_method('fourth linear_model_call_sklearn', a, b)
self.computer_error(a, b)
def five_linear_regression(self):
def computer_gradent(b_current, m_current, data, learning_rate):
b_gradient = 0
m_gradient = 0
N = float(len(data))
# 向量化形式
x = data[:, 0]
y = data[:, 1]
b_gradient = -(2 / N) * (y - (m_current * x + b_current)) # 对平方误差损失函数求偏导
b_gradient = np.sum(b_gradient, axis=0)
m_gradient = -(2 / N) * x * (y - (m_current * x + b_current)) # 目的是极小化平方误差
m_gradient = np.sum(m_gradient, axis=0)
# 用偏导数更新b和m的值
new_b = b_current - (learning_rate * b_gradient)
new_m = m_current - (learning_rate * m_gradient)
return [new_b, new_m]
def optimizer(data, starting_b, starting_m, learning_rate, num_iter):
b = starting_b
m = starting_m
# gradient descent
for i in range(num_iter):
# update b and m with the new more accurate b and m by performing
# this gradient step
b, m = computer_gradent(b, m, data, learning_rate)
return [b, m]
def Linear_regerssion(data):
# define hyperparamters 定义超参数
# learning_rate is used for update gradient
# define the number that will iteration
# define y =mx+b
learning_rate = 0.001
initial_b = 0.0
initial_m = 0.0
num_iter = 1000
[b, m] = optimizer(data, initial_b, initial_m, learning_rate, num_iter)
return m, b
m, b = Linear_regerssion(self.data)
self.print_method('five_linear_regression', m, b)
self.plt_method('five_linear_regression', m, b)
self.computer_error(m, b)
model = Linear_regression_methods(data)
model.one_leastsq_call_numpy_pakeage()
model.two_leatsq_call_scipy_pakeage()
model.three_leastsq_function()
model.four_linear_model_call_sklearn()
model.five_linear_regression()