#%%
#------------------------------------#
import sys, os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sbn
#%%
#------------------------------------#
raw_data_array = np.concatenate((np.random.normal(0, 0.5, [3000, 1]), np.random.normal(6, 2, [7000, 1])))
data = pd.DataFrame(raw_data_array, index=np.linspace(1, 10000, 10000, dtype=int), columns=['height'])
print(data.info())
# data['height_bin'] = pd.cut(data['height'], bins = 10)
#%%
# from sklearn.preprocessing import OneHotEncoder, LabelEncoder
plt.hist(data['height'], bins=30)
#%%
#GMM初始化模型参数
c1 = 0.7
c2 = 0.3
u1 = -1.0
u2 = 2.0
th1 = 1
th2 = 1
data['p1'] = 0.5
data['p2'] = 0.5
data.sample(10)
#%%
#------------------------------------#
import math
for epoch in range(10):
for i in range(len(data)):
data.iloc[i]['p1'] = float(c1*math.exp(-((data.iloc[i]['height']-u1)/th1)**2/2)/pow(2*math.pi*th1*th1, 0.5))
data.iloc[i]['p2'] = float(c2*math.exp(-((data.iloc[i]['height']-u2)/th2)**2/2)/pow(2*math.pi*th2*th2, 0.5))
x = data.iloc[i]['p1'] + data.iloc[i]['p2']
data.iloc[i]['p1'] = data.iloc[i]['p1']/x
data.iloc[i]['p2'] = data.iloc[i]['p2']/x
c1 = data['p1'].mean()
c2 = data['p2'].mean()
u1 = (data['p1']*data['height']).sum()/(data['p1'].sum())
u2 = (data['p2']*data['height']).sum()/(data['p2'].sum())
th1 = pow((data['p1']*pow(data['height']-u1, 2)).sum()/data['p1'].sum(), 0.5)
th2 = pow((data['p2']*pow(data['height']-u2, 2)).sum()/data['p2'].sum(), 0.5)
print("{} times processed:".format(epoch))
print("\tc1={}, c2={}, u1={}, u2={}, th1={}, th2={}".format(c1, c2, u1, u2, th1, th2))
#%%
网友评论