期望最大化(Expectation Maximization)算法简介和Python代码实现(附代码)
数据派THU
共 5961字,需浏览 12分钟
· 2022-06-08
![](https://filescdn.proginn.com/a06b3a934a35760a9a6925de77e5d3fd/39c6e761b0ddede90311a8e133c2a5a9.webp)
来源:DeepHub IMBA 本文约3400字,建议阅读5分钟
本文中通过几个简单的示例解释期望最大化算法是如何工作的。
这个算法最流行的例子(互联网上讨论最多的)可能来自这篇论文
(http://www.nature.com/nbt/journal/v26/n8/full/nbt1406.html)。这是一个非常简单的例子,所以我们也从这里开始。
![](https://filescdn.proginn.com/bcf2c2a731f337c7e90f32e5cf18e09c/73be6610cdf06971cd3eb9ae49743e43.webp)
![](https://filescdn.proginn.com/5994b3dc80ce54d4b0d2842bb21fecac/3a9f1bfde3902542b149752f7f00f861.webp)
![](https://filescdn.proginn.com/325e14ba411175571f1e37aca1d367e5/ba838d70029fe7aa7c3908dd5bef9b72.webp)
m = 10 # number of flips in experimentn = 5 # number of experiments
p_1 = 0.8
p_2 = 0.3
xs = [] # number of heads in each experiment
zs = [0,0,1,0,1] # which coin to flip
np.random.seed(5)
for i in zs:
if i==0:
xs.append(stats.binom(n=m, p=p_1).rvs()) # flip coin 1
elif i==1:
xs.append(stats.binom(n=m, p=p_2).rvs()) # flip coin 2
xs = np.array(xs)
print(xs)
exp1 = [0,1,3] # indexes of experiments with coin 1
exp2 = [2,4] # indexes of experiments with coin 2
print('Analytical solutions:')
print('p1: ', xs[exp1].sum() / (m * len(exp1)))
print('p2: ', xs[exp2].sum() / (m * len(exp2)))
![](https://filescdn.proginn.com/78e8cbc5fae2a76e5fbb8731973b851b/5bb25807e775b8bb2262604f955305e7.webp)
ef neg_log_likelihood(probs, m, xs, zs):
'''
compute negative log-likelihood
'''
ll = 0
for x,z in zip(xs,zs):
ll += stats.binom(p=probs[z], n=m).logpmf(x)
return -ll
res = optimize.minimize(neg_log_likelihood, [0.5,0.5], bounds=[(0,1),(0,1)], args=(m,xs,zs), method='tnc')
print('Numerical solution:')
print('p1: ', res.x[0])
print('p2: ', res.x[1])
![](https://filescdn.proginn.com/ec86d90bc0e6205d5f87375f5f5998a1/606bcb0ac368984ab869ad82938db08d.webp)
![](https://filescdn.proginn.com/c8e1b6b128b5b4230c44e96624aa288e/8b7816cb87c6c07e5db21c95348a1800.webp)
![](https://filescdn.proginn.com/72b5564c304c1e5b6c7985527d044534/2499ddd25bd8c13a848c77c3e46ee446.webp)
![](https://filescdn.proginn.com/9b355e8802843901c6d051ca4c465079/7b478151c998ebd4ff6e17bd3a9b76f0.webp)
![](https://filescdn.proginn.com/65966bf75785e37579cfed74129690de/ebc4125cd95ef0998e1bbf2853086191.webp)
![](https://filescdn.proginn.com/acfeaefbb50bc9f289dcfe180445ee8a/ad4a29f55bdd3c73571044816859a3c6.webp)
期望步骤(E-step):计算完整对数似然函数相对于 Z 给定数据 X 的当前条件分布和当前参数估计 theta 的条件期望; 最大化步骤(M-step):找到最大化该期望的参数 theta 的值。
![](https://filescdn.proginn.com/0d35d462c84de22c82dcd196b0c3260e/33a96f9ab02a95a8d341da4a8b17c09d.webp)
![](https://filescdn.proginn.com/781600667d4c52f7e4583d0e554caeb4/867c0661d4f08668a085e68dcf93d416.webp)
![](https://filescdn.proginn.com/05bbc824a2aece4f3421c311b5a4bf76/190178ce4d275f3de51d29673e8c4baa.webp)
![](https://filescdn.proginn.com/5283491cd209e59b2b340760d32c4a41/60c414a866aca9b38b90bef153b3d7e8.webp)
![](https://filescdn.proginn.com/eeca9f5b0c8a169f2cdacee48147d6fd/f1aa569129d1cf8678f79f86ab05751e.webp)
m = 10 # number of flips in each sample
n = 5 # number of samples
xs = np.array([5,9,8,4,7])
theta = [0.6, 0.5] # initial guess p_1, p_2
for i in range(10):
theta =
T_1s = []
T_2s = []
# E-step
for x in xs:
T_1 = stats.binom(n=m,p=theta[0]).pmf(x) / (stats.binom(n=m,p=theta[0]).pmf(x) +
m,p=theta[1]).pmf(x)) =
T_2 = stats.binom(n=m,p=theta[1]).pmf(x) / (stats.binom(n=m,p=theta[0]).pmf(x) +
m,p=theta[1]).pmf(x)) =
T_1s.append(T_1)
T_2s.append(T_2)
# M-step
T_1s = np.array(T_1s)
T_2s = np.array(T_2s)
p_1 = np.dot(T_1s, xs) / (m * np.sum(T_1s))
p_2 = np.dot(T_2s, xs) / (m * np.sum(T_2s))
{i}, p1={p_1:.2f}, p2={p_2:.2f}') :
theta = [p_1, p_2]
![](https://filescdn.proginn.com/453325b9e07950bfe10b6f505d70c845/b82ea564bc45fbc79690f0e84117e3d0.webp)
![](https://filescdn.proginn.com/4f3a223f03485e4ec20bc09783a51d94/9057444e721be276bda085635071ed59.webp)
![](https://filescdn.proginn.com/0ff2d02de85d1b0f228d9463b6f83f3c/9d5700fce6c06b50102287e4ecec7f44.webp)
![](https://filescdn.proginn.com/f24e26c68a3daa94afcfd7552150af87/c346836c6f9fa6250c25cfc94029c7dd.webp)
![](https://filescdn.proginn.com/54d2e8f0e6310e799e0465d1ad51e1cf/7e12904038e1916b7bb80673c9f320ad.webp)
![](https://filescdn.proginn.com/ab3f3de197b6d2c4e82301490d33e7ea/2fd5376a075a935563d887fbaec3e75c.webp)
![](https://filescdn.proginn.com/fb751aaa981c5c1a2b72065148940d16/f5d4e04b8e276742d04d67d358fbbd50.webp)
![](https://filescdn.proginn.com/a3788753444cd0cbea13a0572e99da93/67103e44bb74fba93dee6703921823cd.webp)
# model parameters
p_1 = 0.1
p_2 = 0.8
tau_1 = 0.3
tau_2 = 1-tau_1
m = 10 # number of flips in each sample
n = 10 # number of samples
# generate data
np.random.seed(123)
dists = [stats.binom(n=m, p=p_1), stats.binom(n=m, p=p_2)]
xs = [dists[x].rvs() for x in np.random.choice([0,1], size=n, p=[tau_1,tau_2])]
# random initial guess
np.random.seed(123)
theta = [np.random.rand() for _ in range(3)]
last_ll = 0
max_iter = 100
for i in range(max_iter):
theta =
T_1s = []
T_2s = []
# E-step
lls = []
for x in xs:
denom = (tau * stats.binom(n=m,p=p_1).pmf(x) + (1-tau) * stats.binom(n=m,p=p_2).pmf(x))
T_1 = tau * stats.binom(n=m,p=p_1).pmf(x) / denom
T_2 = (1-tau) * stats.binom(n=m,p=p_2).pmf(x) / denom
T_1s.append(T_1)
T_2s.append(T_2)
* np.log(tau * stats.binom(n=m,p=p_1).pmf(x)) +
T_2 * np.log(tau * stats.binom(n=m,p=p_2).pmf(x)))
# M-step
T_1s = np.array(T_1s)
T_2s = np.array(T_2s)
tau = np.sum(T_1s) / n
p_1 = np.dot(T_1s, xs) / (m * np.sum(T_1s))
p_2 = np.dot(T_2s, xs) / (m * np.sum(T_2s))
{i}, tau={tau}, p1={p_1:.2f}, p2={p_2:.2f}, log_likelihood={sum(lls):.2f}') :
theta = [tau, p_1, p_2]
# stop when likelihood doesn't improve anymore
if abs(sum(lls) - last_ll) < 0.001:
break
else:
last_ll=sum(lls)
![](https://filescdn.proginn.com/acfe3c639f2fb0974f364c40af5b7e7b/bfa85c4d25f96bb1cd7668836362b523.webp)
https://github.com/financialnoob/misc/blob/305bf8bc7cbdddaf47d40078100ba27935ff4452/6.introduction_to_em_algorithm.ipynb
编辑:于腾凯 校对:林亦霖
评论