6、交叉验证的递归特征
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import RFECV
from sklearn.datasets import make_classification
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
#使用3个信息特性构建分类任务
X, y = make_classification(
n_samples=1000,
n_features=25,
n_informative=3,
n_redundant=2,
n_repeated=0,
n_classes=8,
n_clusters_per_class=1,
random_state=0,
)
# 创建RFE对象并计算交叉验证
svc = SVC(kernel="linear")
#“准确度”评分显示正确分类的比例
min_features_to_select = 1 # 最小特性数
rfecv = RFECV(
estimator=svc,
step=1,
cv=StratifiedKFold(2),
scoring="accuracy",
min_features_to_select=min_features_to_select,
)
rfecv.fit(X, y)
print("Optimal number of features : %d" % rfecv.n_features_)
# 图特征数vs。交叉验证
plt.figure()
plt.xlabel("Number of features selected")
plt.ylabel("Cross validation score (accuracy)")
plt.plot(
range(min_features_to_select, len(rfecv.grid_scores_) + min_features_to_select),
rfecv.grid_scores_,
)
plt.title("交叉验证的递归特征")
plt.show()