第一次尝试用面向对象的方式构建函数,写了一个GaussianNB类,完善了拟合、预测函数。
import numpy as np
class GaussianNB():
def fit(self, X, y):
"""模型拟合"""
self.y_prior = [round(sum(y == i) / len(y), 6) for i in sorted(set(y))]
self.features_param = []
for i in sorted(set(y)):
pos = np.where(y == i)
features_data = X[pos]
features_mean = np.mean(features_data, axis=0)
features_std = np.std(features_data, axis=0)
param = [(round(avg, 6), round(std, 6)) for avg, std in zip(features_mean, features_std)]
self.features_param.append(param)
def predict(self, x):
"""模型预测"""
result = []
for i in range(x.shape[0]):
bayes_prob = []
for j in range(len(self.y_prior)):
x_param = self.features_param[j]
y_param = self.y_prior[j]
xi_conditional_prob = 1
for k in range(len(x_param)):
xi_conditional_prob *= self.gauss_pro(x[i][k], x_param[k][0], x_param[k][1])
bayes_prob.append(round(y_param * xi_conditional_prob, 6))
result.append(np.where(bayes_prob == np.max(bayes_prob))[0][0])
return np.array(result)
def gauss_pro(self, v, miu, sigma):
"""高斯分布概率密度计算"""
part1 = 1 / (sigma * np.sqrt(2 * np.pi))
part2 = np.exp(-1 * (v - miu) ** 2 / (2 * sigma ** 2))
return round(part1 * part2, 6)
if __name__ == '__main__':
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target
gnb = GaussianNB()
gnb.fit(X, y)
res = gnb.predict(X)
print(res)