import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv("datas/anomaly_data.csv")
df.head()
# 计算x1、x2均值和标准差
x1_mean = x1.mean()
x1_sigma = x1.std()
x2_mean = x2.mean()
x2_sigma = x2.std()
from scipy.stats import norm
# 计算高斯分布
x1_range = np.linspace(0, 20, 300) #随机生成300个数
x1_morm = norm.pdf(x1_range, x1_mean, x1_sigma) # 300个数的高斯分布
x2_range = np.linspace(0, 20, 300) #随机生成300个数
x2_morm = norm.pdf(x2_range, x2_mean, x2_sigma) # 300个数的高斯分布
#建立模型
from sklearn.covariance import EllipticEnvelope
ad_model = EllipticEnvelope()
ad_model.fit(df)
# 预测
y_predict = ad_model.predict(df)
# 阈值改小一点
ad_model2 = EllipticEnvelope(contamination=0.01)
ad_model2.fit(df)
y_predict = ad_model2.predict(df)
#画图 异常点
plt.figure()
raw = plt.scatter(df["x1"], df["x2"], marker='x')
anomaly = plt.scatter(df["x1"][y_predict==-1], df["x2"][y_predict==-1], marker='o', facecolor='none', edgecolors='r', s=100)
plt.title("异常数据检测")
plt.xlabel("x1")
plt.ylabel("x2")
plt.legend((raw, anomaly), ("原始数据", "异常数据"))
plt.show()