Fisher线性判别分析（LDA）

Sw−1Sb的特征值与特征向量

eigvals, eigvecs np.linalg.eig(np.linalg.inv(S_W) * S_B) # 求特征值 特征向量
np.testing.assert_array_almost_equal(np.mat(np.linalg.inv(S_W) * S_B) * np.mat(eigvecs[:, 0].reshape(4, 1)),
 eigvals[0] * np.mat(eigvecs[:, 0].reshape(4, 1)), decimal 6, err_msg ,
 verbose True)

5. 为新特征子空间选择线性判别式

eig_pairs [(np.abs(eigvals[i]), eigvecs[:, i]) for i in range(len(eigvals))]
eig_pairs sorted(eig_pairs, key lambda k: k[0], reverse True)
W np.hstack((eig_pairs[0][1].reshape(4, 1), eig_pairs[1][1].reshape(4, 1)))

6. 把样本转换到新的子空间

X_trans X.dot(W)
assert X_trans.shape (150, 2)

7. 对比sklearn

plt.figure(figsize (8, 4))
plt.subplot(121)
plt.scatter(X_trans[y 0, 0], X_trans[y 0, 1], c r )
plt.scatter(X_trans[y 1, 0], X_trans[y 1, 1], c g )
plt.scatter(X_trans[y 2, 0], X_trans[y 2, 1], c b )
plt.title( DIY LDA )
plt.xlabel( $LD_1$ )
plt.ylabel( $LD_2$ )
plt.legend(labels, loc best , fancybox True)
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
X_trans2 LinearDiscriminantAnalysis(n_components 2).fit_transform(X, y)
plt.subplot(122)
plt.scatter(X_trans2[y 0, 0], X_trans2[y 0, 1], c r )
plt.scatter(X_trans2[y 1, 0], X_trans2[y 1, 1], c g )
plt.scatter(X_trans2[y 2, 0], X_trans2[y 2, 1], c b )
plt.title( sklearn LDA )
plt.xlabel( $LD_1$ )
plt.ylabel( $LD_2$ )
plt.legend(labels, loc best , fancybox True)

图示

8. 完整代码

from sklearn import datasets
import matplotlib.pyplot as plt
import numpy as np
import math
# 数据准备
iris datasets.load_iris()
X iris.data
y iris.target
names iris.feature_names # 属性名称
labels iris.target_names # 类别名称
y_c np.unique(y) # 离散化数据 去除重复数据
# 用一维直方图来可视化四个属性的特征分布
fig, axes plt.subplots(2, 2, figsize (12, 6))
for ax, column in zip(axes.ravel(), range(X.shape[1])):
 # 设置图窗大小
 min_b math.floor(np.min(X[:, column]))
 max_b math.ceil(np.max(X[:, column]))
 bins np.linspace(min_b, max_b, 25)
 # 绘制直方图
 for i, color in zip(y_c, ( blue , red , green )):
 ax.hist(X[y i, column], color color, label %s % labels[i],
 bins bins, alpha 0.5, )
 ylims ax.get_ylim()
 # 绘制注释
 l ax.legend(loc upper right , fancybox True, fontsize 8)
 l.get_frame().set_alpha(0.5)
 ax.set_ylim([0, max(ylims) 2])
 ax.set_xlabel(names[column])
 ax.set_title( Iris histogram feature %s % str(column 1))
 # 隐藏坐标轴
 ax.tick_params(axis both , which both , bottom False, top False, left False, right False,
 labelbottom True, labelleft True)
 # 移除其他选项
 ax.spines[ top ].set_visible(False)
 ax.spines[ right ].set_visible(False)
 ax.spines[ bottom ].set_visible(False)
 ax.spines[ left ].set_visible(False)
axes[0][0].set_ylabel( count )
axes[1][0].set_ylabel( count )
fig.tight_layout()
plt.show()
# 求取d维向量特征值
np.set_printoptions(precision 4)
mean_vector [] # 类别的平均值
for i in y_c:
 mean_vector.append(np.mean(X[y i], axis 0))
 print( 均值向量 %s:%sn % (i, mean_vector[i]))
# 计算类内离散度矩阵
S_W np.zeros((X.shape[1], X.shape[1]))
for i in y_c:
 Xi X[y i] - mean_vector[i]
 S_W np.mat(Xi).T * np.mat(Xi)
print( 类内离散度矩阵:n , S_W)
# 计算类间离散度矩阵
S_B np.zeros((X.shape[1], X.shape[1]))
mu np.mean(X, axis 0) # 所有样本平均值
for i in y_c:
 Ni len(X[y i])
 S_B Ni * np.mat(mean_vector[i] - mu).T * np.mat(mean_vector[i] - mu)
print( 类间离散度矩阵:n , S_B)
# 计算矩阵的特征值与特征向量
eigvals, eigvecs np.linalg.eig(np.linalg.inv(S_W) * S_B) # 求特征值 特征向量
np.testing.assert_array_almost_equal(np.mat(np.linalg.inv(S_W) * S_B) * np.mat(eigvecs[:, 0].reshape(4, 1)),
 eigvals[0] * np.mat(eigvecs[:, 0].reshape(4, 1)), decimal 6, err_msg ,
 verbose True)
# 为新特征子空间选择线性判别式
eig_pairs [(np.abs(eigvals[i]), eigvecs[:, i]) for i in range(len(eigvals))]
eig_pairs sorted(eig_pairs, key lambda k: k[0], reverse True)
W np.hstack((eig_pairs[0][1].reshape(4, 1), eig_pairs[1][1].reshape(4, 1)))
# 把样本转换到新的子空间
X_trans X.dot(W)
assert X_trans.shape (150, 2)
# 对比sklearn
plt.figure(figsize (8, 4))
plt.subplot(121)
plt.scatter(X_trans[y 0, 0], X_trans[y 0, 1], c r )
plt.scatter(X_trans[y 1, 0], X_trans[y 1, 1], c g )
plt.scatter(X_trans[y 2, 0], X_trans[y 2, 1], c b )
plt.title( DIY LDA )
plt.xlabel( $LD_1$ )
plt.ylabel( $LD_2$ )
plt.legend(labels, loc best , fancybox True)
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
X_trans2 LinearDiscriminantAnalysis(n_components 2).fit_transform(X, y)
plt.subplot(122)
plt.scatter(X_trans2[y 0, 0], X_trans2[y 0, 1], c r )
plt.scatter(X_trans2[y 1, 0], X_trans2[y 1, 1], c g )
plt.scatter(X_trans2[y 2, 0], X_trans2[y 2, 1], c b )
plt.title( sklearn LDA )
plt.xlabel( $LD_1$ )
plt.ylabel( $LD_2$ )
plt.legend(labels, loc best , fancybox True)

Fisher线性判别分析（LDA）

Python相关栏目本月热门文章