eigvals, eigvecs np.linalg.eig(np.linalg.inv(S_W) * S_B) # 求特征值 特征向量 np.testing.assert_array_almost_equal(np.mat(np.linalg.inv(S_W) * S_B) * np.mat(eigvecs[:, 0].reshape(4, 1)), eigvals[0] * np.mat(eigvecs[:, 0].reshape(4, 1)), decimal 6, err_msg , verbose True)5. 为新特征子空间选择线性判别式
eig_pairs [(np.abs(eigvals[i]), eigvecs[:, i]) for i in range(len(eigvals))] eig_pairs sorted(eig_pairs, key lambda k: k[0], reverse True) W np.hstack((eig_pairs[0][1].reshape(4, 1), eig_pairs[1][1].reshape(4, 1)))6. 把样本转换到新的子空间
X_trans X.dot(W) assert X_trans.shape (150, 2)7. 对比sklearn
plt.figure(figsize (8, 4)) plt.subplot(121) plt.scatter(X_trans[y 0, 0], X_trans[y 0, 1], c r ) plt.scatter(X_trans[y 1, 0], X_trans[y 1, 1], c g ) plt.scatter(X_trans[y 2, 0], X_trans[y 2, 1], c b ) plt.title( DIY LDA ) plt.xlabel( $LD_1$ ) plt.ylabel( $LD_2$ ) plt.legend(labels, loc best , fancybox True) from sklearn.discriminant_analysis import LinearDiscriminantAnalysis X_trans2 LinearDiscriminantAnalysis(n_components 2).fit_transform(X, y) plt.subplot(122) plt.scatter(X_trans2[y 0, 0], X_trans2[y 0, 1], c r ) plt.scatter(X_trans2[y 1, 0], X_trans2[y 1, 1], c g ) plt.scatter(X_trans2[y 2, 0], X_trans2[y 2, 1], c b ) plt.title( sklearn LDA ) plt.xlabel( $LD_1$ ) plt.ylabel( $LD_2$ ) plt.legend(labels, loc best , fancybox True)
图示
from sklearn import datasets import matplotlib.pyplot as plt import numpy as np import math # 数据准备 iris datasets.load_iris() X iris.data y iris.target names iris.feature_names # 属性名称 labels iris.target_names # 类别名称 y_c np.unique(y) # 离散化数据 去除重复数据 # 用一维直方图来可视化四个属性的特征分布 fig, axes plt.subplots(2, 2, figsize (12, 6)) for ax, column in zip(axes.ravel(), range(X.shape[1])): # 设置图窗大小 min_b math.floor(np.min(X[:, column])) max_b math.ceil(np.max(X[:, column])) bins np.linspace(min_b, max_b, 25) # 绘制直方图 for i, color in zip(y_c, ( blue , red , green )): ax.hist(X[y i, column], color color, label %s % labels[i], bins bins, alpha 0.5, ) ylims ax.get_ylim() # 绘制注释 l ax.legend(loc upper right , fancybox True, fontsize 8) l.get_frame().set_alpha(0.5) ax.set_ylim([0, max(ylims) 2]) ax.set_xlabel(names[column]) ax.set_title( Iris histogram feature %s % str(column 1)) # 隐藏坐标轴 ax.tick_params(axis both , which both , bottom False, top False, left False, right False, labelbottom True, labelleft True) # 移除其他选项 ax.spines[ top ].set_visible(False) ax.spines[ right ].set_visible(False) ax.spines[ bottom ].set_visible(False) ax.spines[ left ].set_visible(False) axes[0][0].set_ylabel( count ) axes[1][0].set_ylabel( count ) fig.tight_layout() plt.show() # 求取d维向量特征值 np.set_printoptions(precision 4) mean_vector [] # 类别的平均值 for i in y_c: mean_vector.append(np.mean(X[y i], axis 0)) print( 均值向量 %s:%sn % (i, mean_vector[i])) # 计算类内离散度矩阵 S_W np.zeros((X.shape[1], X.shape[1])) for i in y_c: Xi X[y i] - mean_vector[i] S_W np.mat(Xi).T * np.mat(Xi) print( 类内离散度矩阵:n , S_W) # 计算类间离散度矩阵 S_B np.zeros((X.shape[1], X.shape[1])) mu np.mean(X, axis 0) # 所有样本平均值 for i in y_c: Ni len(X[y i]) S_B Ni * np.mat(mean_vector[i] - mu).T * np.mat(mean_vector[i] - mu) print( 类间离散度矩阵:n , S_B) # 计算矩阵的特征值与特征向量 eigvals, eigvecs np.linalg.eig(np.linalg.inv(S_W) * S_B) # 求特征值 特征向量 np.testing.assert_array_almost_equal(np.mat(np.linalg.inv(S_W) * S_B) * np.mat(eigvecs[:, 0].reshape(4, 1)), eigvals[0] * np.mat(eigvecs[:, 0].reshape(4, 1)), decimal 6, err_msg , verbose True) # 为新特征子空间选择线性判别式 eig_pairs [(np.abs(eigvals[i]), eigvecs[:, i]) for i in range(len(eigvals))] eig_pairs sorted(eig_pairs, key lambda k: k[0], reverse True) W np.hstack((eig_pairs[0][1].reshape(4, 1), eig_pairs[1][1].reshape(4, 1))) # 把样本转换到新的子空间 X_trans X.dot(W) assert X_trans.shape (150, 2) # 对比sklearn plt.figure(figsize (8, 4)) plt.subplot(121) plt.scatter(X_trans[y 0, 0], X_trans[y 0, 1], c r ) plt.scatter(X_trans[y 1, 0], X_trans[y 1, 1], c g ) plt.scatter(X_trans[y 2, 0], X_trans[y 2, 1], c b ) plt.title( DIY LDA ) plt.xlabel( $LD_1$ ) plt.ylabel( $LD_2$ ) plt.legend(labels, loc best , fancybox True) from sklearn.discriminant_analysis import LinearDiscriminantAnalysis X_trans2 LinearDiscriminantAnalysis(n_components 2).fit_transform(X, y) plt.subplot(122) plt.scatter(X_trans2[y 0, 0], X_trans2[y 0, 1], c r ) plt.scatter(X_trans2[y 1, 0], X_trans2[y 1, 1], c g ) plt.scatter(X_trans2[y 2, 0], X_trans2[y 2, 1], c b ) plt.title( sklearn LDA ) plt.xlabel( $LD_1$ ) plt.ylabel( $LD_2$ ) plt.legend(labels, loc best , fancybox True)



