import numpy as np
import matplotlib.pyplot as plt
from math import e
pi = 3.1415926535
def Gaussian_distribution(u,v,x): #正态分布密度函数
y = 1 / (v * pow(2*pi, 0.5))*np.exp(-((x-u)**2)/(2*v**2)) # 概率密度函数公式
return y
u1 = 1
u2 = 2
x1 = u1 + np.random.uniform(0,1,100) #random()产生的随机数符合正态分布
x2 = u2 + np.random.uniform(0,1,100)
x = np.zeros([2,100])
x[0],x[1] = x1,x2
x = x.reshape(1,200)
x = x.ravel()
x1 = np.array(x1)
x2 = np.array(x2)
x1_mean = np.mean(x1) #以产生随机数的均值作为正态分布的均值
x2_mean = np.mean(x2)
u1 = x1_mean
u2 = x2_mean
C1 = 1.0/100*(x1-x1_mean).T@(x1-x1_mean)#以产生随机数的方差作为正态分布的方差
C2 = 1.0/100*(x2-x2_mean).T@(x2-x2_mean)
v1 = np.sqrt(C1)
v2 = np.sqrt(C2)
k1 = 0.5
k2 = 0.5
m1 = x1_mean #数据初始化
m2 = x2_mean
n1 = v1
n2 = v2
maxtime = 100 #最大迭代次数
for i in range(0,maxtime):
q1_sum = 0
q2_sum = 0
q1_xsum = 0
q2_xsum = 0
q1 = 0
q2 = 0
s1 = 0
s2 = 0
for j in range(200): #EM算法
a1 = k1*Gaussian_distribution(m1,n1,x[j])
a2 = k2*Gaussian_distribution(m2,n2,x[j])
q1 = a1/(a1+a2)
q2 = a2/(a1+a2)
q1_sum = q1_sum+q1
q2_sum = q2_sum+q2
q1_xsum = q1_xsum+q1*x[j]
q2_xsum = q2_xsum+q2*x[j]
# print(q1_xsum,q1_sum,q1_xsum/q1_sum,q2_xsum,q2_sum,q2_xsum/q2_sum)
mm1 = q1_xsum/q1_sum
mm2 = q2_xsum/q2_sum
for j in range(200):
a1 = k1*Gaussian_distribution(m1,n1,x[j])
a2 = k2*Gaussian_distribution(m2,n2,x[j])
q1 = a1/(a1+a2)
q2 = a2/(a1+a2)
s1 = s1+q1*(x[j]-mm1)*(x[j]-mm1)
s2 = s2+q2*(x[j]-mm2)*(x[j]-mm2)
m1 = mm1 #状态更新
m2 = mm2
n1 = s1/q1_sum
n2 = s2/q2_sum
if(((k1-q1_sum/200)/k1<0.01) &((k2-q2_sum/200)/k2 < 0.01)): #迭代误差小于1%,退出迭代
break
k1 = q1_sum/200
k2 = q2_sum/200
sum = 0
for j in range(0,100):
a1 = k1*Gaussian_distribution(m1,n1,x[j])
a2 = k2*Gaussian_distribution(m2,n2,x[j])
q1 = a1/(a1+a2)
q2 = a2/(a1+a2)
if q11:
# print("warning",m2,v2,x[j])
q1 = a1/(a1+a2)
q2 = a2/(a1+a2)
if q1 q2:
sum = sum+1
print(sum/100)
print(m1,m2,n1,n2)
print(u1,u2,v1,v2)
print(k1,k2)
plt.show()
二、原理
高斯混合模型及EM算法
三、结果分析
蓝色的点和红色的点分别表示原有数据原本的正态分布,圆点和星号表示数据被分到了哪个正态分布。
由于算法问题,导致如果数据中,两个正态分布十分接近,会导致只有一个正态分布。如果是随机产生的数据,重复多次运行可能能解决问题。



