#%%
#------------------------------------#
import sys, os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sbn
#%%
#------------------------------------#
raw_data_array = np.concatenate((np.random.normal(0, 0.5, [3000, 1]), np.random.normal(6, 2, [7000, 1])))
data = pd.DataFrame(raw_data_array, index=np.linspace(1, 10000, 10000, dtype=int), columns=['height'])
print(data.info())
# data['height_bin'] = pd.cut(data['height'], bins = 10)
#%%
# from sklearn.preprocessing import OneHotEncoder, LabelEncoder
plt.hist(data['height'], bins=30)
#%%
#GMM初始化模型参数
c1 = 0.7
c2 = 0.3
u1 = -1.0
u2 = 2.0
th1 = 1
th2 = 1
data['p1'] = 0.5
data['p2'] = 0.5
data.sample(10)
#%%
#------------------------------------#
import math
for epoch in range(10):
for i in range(len(data)):
data.iloc[i]['p1'] = float(c1*math.exp(-((data.iloc[i]['height']-u1)/th1)**2/2)/pow(2*math.pi*th1*th1, 0.5))
data.iloc[i]['p2'] = float(c2*math.exp(-((data.iloc[i]['height']-u2)/th2)**2/2)/pow(2*math.pi*th2*th2, 0.5))
x = data.iloc[i]['p1'] + data.iloc[i]['p2']
data.iloc[i]['p1'] = data.iloc[i]['p1']/x
data.iloc[i]['p2'] = data.iloc[i]['p2']/x
c1 = data['p1'].mean()
c2 = data['p2'].mean()
u1 = (data['p1']*data['height']).sum()/(data['p1'].sum())
u2 = (data['p2']*data['height']).sum()/(data['p2'].sum())
th1 = pow((data['p1']*pow(data['height']-u1, 2)).sum()/data['p1'].sum(), 0.5)
th2 = pow((data['p2']*pow(data['height']-u2, 2)).sum()/data['p2'].sum(), 0.5)
print("{} times processed:".format(epoch))
print("\tc1={}, c2={}, u1={}, u2={}, th1={}, th2={}".format(c1, c2, u1, u2, th1, th2))
#%%
GMM的EM算法实现
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
推荐阅读更多精彩内容
- 本文的计算公式出自《统计学习方法》,写这篇文章主要是想把自己对这个算法的思路理清,并把自己的理解记录下来,同时分享...
- 05 EM算法 - 高斯混合模型 - GMM 多元正态分布 - multivariate_normal API参考...