初始k个中心(本文是2个)
一直循环,
计算每个点离每个中心的距离
根据距离进行分类
用分类后的每一个聚类内部数据的均值,当作新的聚类中心
如果新的中心和久的中心一致,
就break循环
import numpy as np
import matplotlib.pyplot as plt
import readtemp # 用树莓派pioneer300拓展的温度传感器
mean1 = 22
sigma1 = 1.3
mean2 = 29
sigma2 = 1
person1 = mean1 + sigma1 * np.random.randn(12)
person2 = mean2 + sigma2 * np.random.randn(18)
x = np.hstack((person1,person2))
k = 2
n = len(x)
dis = np.zeros([n, k+1])
# 1.选择初始聚类中心
center1 = np.array([x[0]])
center2 = np.array([x[1]])
iter_ = 100
useTime1 = 0
useTime2 = 0
while iter_ > 0:
# 2.求各个点到两个聚类中心距离
for i in range(n):
dis[i, 0] = np.sqrt((x[i] - center1[0])**2)
dis[i, 1] = np.sqrt((x[i] - center2[0])**2)
# 3.归类
dis[i, 2] = np.argmin(dis[i,:2]) # 将值较小的下标值赋值给dis[i, 2]
# 4.求新的聚类中心
index1 = dis[:, 2] == 0
index2 = dis[:, 2] == 1
useTime1 = np.sum(index1 == True)
useTime2 = np.sum(index2 == True)
#print(index1)
center1_new = np.array([x[index1].mean()])
center2_new = np.array([x[index2].mean()])
# 5.判定聚类中心是否发生变换
if (center1 == center1_new) and (center2 == center2_new):
# 如果没发生变换则退出循环,表示已得到最终的聚类中心
break
center1 = center1_new
center2 = center2_new
# 6.输出结果以验证
temperature = readtemp.read_temp()
dis1 = temperature - center1
dis2 = temperature - center2
target = 0
if dis1 < dis2:
target = 1
else:
target = 2 # 以上是在判断当前温度,仍然是传感器模块
print("center1: %d times of use: %dncenter2: %d times of use: %dn"%(center1, useTime1, center2, useTime2))
print("current temperature: %dn Used by user%d"%(temperature, target))
Centerx = np.array([center1, center2])
Centery = np.array([0, 0])
# 画图
y = np.zeros(n)
plt.scatter(x,y, color = 'red', alpha = 0.3, s = 70, label = 'originalData')
plt.scatter(Centerx,Centery, color = 'green', s = 50, label = 'center')
plt.title('k-means clustering')
plt.show()



