你好
数据:leaf_data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit
train_data = pd.read_csv("train.csv")
LABELS = train_data['species']
# 将train_data中的‘id’列弹出。
ID = train_data.pop('id')
# print train_data[0:1]
# 将train_data中的‘species’列弹出。
y = train_data.pop('species')
# 将species向量化。
y = LabelEncoder().fit(y).transform(y)
print y
# standardize the data by setting the mean to 0 and std to 1
standardize = True
X = StandardScaler().fit(train_data).transform(train_data) if standardize else train_data.values
print X[0:1]
from sklearn.decomposition import PCA, IncrementalPCA
n_components = 60
#ipca = IncrementalPCA(n_components=n_components, batch_size=20)
#X_ipca = ipca.fit_transform(X)
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X)
'''
colors = ['navy', 'turquoise', 'darkorange', 'blue', 'purple', 'green',
'yellow','red','pink', 'palegoldenrod','navy', 'turquoise', 'darkorange', 'blue', 'purple', 'green',
'yellow','red','pink', 'palegoldenrod','navy', 'turquoise', 'darkorange', 'blue', 'purple', 'green',
'yellow','red','pink', 'palegoldenrod',]
for X_transformed, title in [(X_ipca, "Incremental PCA"), (X_pca, "PCA")]:
plt.figure(figsize=(8, 8))
for color, i, target_name in \
zip(colors, [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24], LABELS):
plt.scatter(X_transformed[y == i, 0], X_transformed[y == i, 1],
color=color, lw=2, label=target_name)
if "Incremental" in title:
err = np.abs(np.abs(X_pca) - np.abs(X_ipca)).mean()
plt.title(title + " of iris dataset\nMean absolute unsigned error "
"%.6f" % err)
else:
plt.title(title + " of iris dataset")
#plt.legend(loc="best", shadow=False, scatterpoints=1)
plt.axis([-10, 10, -10, 10])
plt.show()'''
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
from sklearn.ensemble import RandomForestClassifier
from subprocess import check_output
forest = RandomForestClassifier(n_estimators = 100)
forest = forest.fit(X_train, y_train)
output = forest.score(X_test, y_test)
print 'RandomForestClassifier:', output
from sklearn import tree
regressor = tree.DecisionTreeClassifier()
regressor = regressor.fit(X_train, y_train)
score = regressor.score(X_test, y_test)
print 'Decision Tree:', score
from sklearn import svm
Svm = svm.SVC()
Svm = Svm.fit(X_train, y_train)
Svm_score = Svm.score(X_test, y_test)
print 'svm:', Svm_score
from sklearn import neighbors
n_neighbors = 2
weights = 'uniform'
KNN = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
KNN = KNN.fit(X_train, y_train)
KNN_score = KNN.score(X_test, y_test)
print 'KNN:', KNN_score
from sklearn.ensemble import AdaBoostClassifier
Ada = AdaBoostClassifier(n_estimators=100)
Ada = Ada.fit(X_train, y_train)
Ada_score = Ada.score(X_test, y_test)
print 'AdaBoostClassifier:', Ada_score
from sklearn.neural_network import MLPClassifier
MLP = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(500, 300), random_state=1)
MLP = MLP.fit(X_train, y_train)
MLP_score = MLP.score(X_test, y_test)
print 'MLPClassifier:', MLP_score
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb = gnb.fit(X_train, y_train)
gnb_score = gnb.score(X_test, y_test)
print 'GaussianNB:', gnb_score
未进行PCA降维结果:
PCA降维后结果: