import numpy as np
import math
import operator
x=np.array([2.5,0.5,2.2,1.9,3.1,2.3,2,1,1.5,1.1])
y=np.array([2.4,0.7,2.9,2.2,3.0,2.7,1.6,1.1,1.6,0.9])
X=np.sum(x)/10
Y=np.sum(y)/10
print (X,Y)#求解平均值
1.81 1.9100000000000001
x1=np.array(x-X)
x1
array([ 0.69, -1.31, 0.39, 0.09, 1.29, 0.49, 0.19, -0.81, -0.31,
-0.71])
y1=np.array(y-Y)
y1
array([ 0.49, -1.21, 0.99, 0.29, 1.09, 0.79, -0.31, -0.81, -0.31,
-1.01])
data = np.c_[x1,y1]
print (data)
[[ 0.69 0.49]
[-1.31 -1.21]
[ 0.39 0.99]
[ 0.09 0.29]
[ 1.29 1.09]
[ 0.49 0.79]
[ 0.19 -0.31]
[-0.81 -0.81]
[-0.31 -0.31]
[-0.71 -1.01]]
cov_=np.cov(data)
print (cov_)#求解协方差
[[0.61655556 0.61544444]
[0.61544444 0.71655556]]
eigen = np.linalg.eig(cov_)
print (eigen)#第一个特征值,后面是特征向量
(array([0.0490834 , 1.28402771]), array([[-0.73517866, -0.6778734 ],
[ 0.6778734 , -0.73517866]]))
data_max=np.max([0.490834,1.28402771])
print (data_max)#求出特征值里面比较大的那个,对应的特征向量是[ -0.6778734 , -0.73517866]
1.28402771
t=np.array([[-0.6778734],
[-0.73517866]])
print(t)
[[-0.6778734 ]
[-0.73517866]]
data_finally=np.dot(data,t)#注意顺序,特征向量在后面
print (data_finally)
[[-0.82797019]
[ 1.77758033]
[-0.9921975 ]
[-0.27421042]
[-1.67580143]
[-0.91294911]
[ 0.09910944]
[ 1.14457217]
[ 0.43804614]
[ 1.22382056]]