过拟合的认识
train_sizes,train_loss,test_loss = learning_curve(
SVC(gamma = 0.001),X,y,cv = 10,scoring = 'mean_squared_error',
train_sizes = [0.1,0.25,0.5,0.75,1])
1.svc是选择的model gamma表示选择的
2.train_sizes 在训练的集合百分制的情况下返回
import numpy as np
from sklearn.datasets import load_digits
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import cross_val_score
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.learning_curve import learning_curve
digits = load_digits()
#用到的是数字的文件用来训练
X = digits.data
y = digits.target
train_sizes,train_loss,test_loss = learning_curve(
SVC(gamma = 0.001),X,y,cv = 10,scoring = 'mean_squared_error',
train_sizes = [0.1,0.25,0.5,0.75,1])
train_loss_mean = -np.mean(train_loss,axis = 1)
test_loss_mean = -np.mean(test_loss,axis = 1)
plt.plot(train_sizes,train_loss_mean,'o-',color = 'r',label = 'training')
plt.plot(train_sizes,test_loss_mean,'o-',color = 'g',label = 'test')
plt.legend(loc = 'best')
plt.show()
图片1 gamma = 0.001
由图片可以发现由于用的是训练数据所以对训练
数据比较有友好
图片2 gamma = 0.01
由于对训练数据的过分友好导致了test_data的表现很差