import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# 鸢尾花数据集导入
irisdata=load_iris()
# X:鸢尾花特征 Y:鸢尾花标签
irisdata.data
irisdata.target
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus']=False
# plt.rcParams['font.sans-serif'] = 'SimHei'
# plt.rcParams['axes.unicode_minus'] = False
#选择两变量画出散点图
plt.scatter(irisdata.data[:,2],irisdata.data[:,3],c=irisdata.target)
plt.xlabel("花瓣长度")
plt.ylabel("花瓣宽度")
# X:鸢尾花特征 y:鸢尾花标签
x=irisdata.data[irisdata.target!=0]
y=irisdata.target[irisdata.target!=0]
#分割数据集为训练集(0.7)和测试集(0.3)
x_train,x_test,y_train,y_target=train_test_split(x,y,test_size=0.3,random_state=40)
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
#计算准确率库
from sklearn.metrics import accuracy_score
#利用训练数据集训练决策树模型
classifier=DecisionTreeClassifier(max_depth = 2)
clf=classifier.fit(x_train,y_train)
tree.plot_tree(clf)
y_pred = classifier.predict(x_test)
#计算准确率
acc=accuracy_score(y_pred,y_target)
#输出结果
print("the accuarcy of prediction is", acc)
plt.scatter(x_test[:,2], x_test[:,3], c=y_target)
plt.show()
#画出预测数据标签情况
plt.scatter(x_test[:,2], x_test[:,3], c=y_pred)
plt.show()
import os
os.environ["PATH"]+= os.pathsep + 'E:ProgramDataAnaconda3binGraphvizbin'
from sklearn import tree
tree.export_graphviz(classifier,out_file="tree.dot" )
import pydotplus
from IPython.display import Image
dot_data=tree.export_graphviz(classifier, out_file=None,feature_names=irisdata.feature_names,filled=True, rounded=True,special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
Image(graph.create_png())