主成分分析(PCA)
import numpy as np
import matplotlib.pyplot as plt
data = np.matrix([[2.5,2.4],[0.5,0.7],[2.2,2.9],[1.9,2.2],[3.1,3.0],
[2.3,2.7],[2,1.6],[1,1.1],[1.5,1.6],[1.1,0.9]])
average = np.mean(data,axis=0)
data_adjust = np.zeros((10,2))
#生成新的data_adjust
for i in range(10):
for k in range(2):
if k == 0:
data_adjust[i, k] = data[i, k] - average.item(0)
else:
data_adjust[i, k] = data[i, k] - average.item(1)
#协方差
cov = np.cov(data_adjust,rowvar=False)
#特征值和特征向量
eigenvalues, eigenvectors = np.linalg.eig(cov)
#选特征值和特征向量
eigenvalues_max = eigenvalues.item(0)
i = 0;max = 0
for eigenvalue in eigenvalues:
if eigenvalues_max < eigenvalue:
eigenvalues_max = eigenvalue
max = i
i += 1
eigenvalues_max = eigenvectors[:,max]
eigenvalues_max = eigenvalues_max.reshape(-1,1)
#投影
final_data = np.dot(data_adjust,eigenvalues_max)
print(final_data)
实验结果:
[[-0.82797019] [ 1.77758033] [-0.99219749] [-0.27421042] [-1.67580142] [-0.9129491 ] [ 0.09910944] [ 1.14457216] [ 0.43804614] [ 1.22382056]]



