Kmeans
无监督学习定义:没有给定事先标记过的训练示例,自动对输入的数据进行分类和分群
#定义x y
x = df.drop(["labels"], axis=1)
y = df["labels"]
#模型训练
km = KMeans(n_clusters=3, random_state=0)
km.fit(x)
# 中心点
centers = km.cluster_centers_
#训练数据的预测结果
y_predict = km.predict(x)
#结果矫正
y_correct = []
for i in y_predict:
if i == 0:
y_correct.append(1)
elif i == 1:
y_correct.append(2)
else:
y_correct.append(0)
#准确率0.997
accuracy_score(y, y_correct)
#矫正数据类型转换
y_correct = np.array(y_correct)
KNN 监督式学习
# KNN
from sklearn.neighbors import KNeighborsClassifier
#训练模型
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(x, y)
# 准确率 1.0
y_predict_knn = knn.predict(x)
accuracy_score(y, y_predict_knn)
mean shift
from sklearn.cluster import MeanShift, estimate_bandwidth
# 圆的半径估算
bw = estimate_bandwidth(x, n_samples=500)
# 模型训练
ms = MeanShift(bandwidth=bw)
ms.fit(x)
#预测和准确率 0.31966666666666665
y_predict_ms = ms.predict(x)
accuracy_score(y, y_predict_ms)
#结果矫正
y_correct_ms = []
for i in y_predict_ms:
if i == 0:
y_correct_ms.append(2)
elif i == 1:
y_correct_ms.append(1)
else:
y_correct_ms.append(0)
# 类型转换
y_correct_ms = np.array(y_correct_ms)
#准确率 0.997
accuracy_score(y, y_correct_ms)