结果代码
结果 代码from sklearn.datasets import load_iris
import numpy as np
from collections import defaultdict
from operator import itemgetter
from sklearn.model_selection import train_test_split
#接收数据集,对应类别,特征索引,具体特征值
#返回特征索引具体特征值对分类最好的类别,以及分类错误的数量
def train_feature_value(X, y_true, feature_index, value):
#统计 类别为feature_index且其特征值为value的个体
class_counts=defaultdict(int)
for sample,y in zip(X,y_true):
if(sample[feature_index]==value):
class_counts[y]+=1
#排序
sorted_class_counts=sorted(class_counts.items(),key=itemgetter(1),reverse=True)
#符合要求个体最多的类别
most_frequent_class=sorted_class_counts[0][0]
#计算以该特征值分类符合要求个体最多的类别的错误率
incorrect_predictions=[class_count for class_value,class_count in class_counts.items() if class_value!=most_frequent_class]
error=sum(incorrect_predictions)
return most_frequent_class,error
#接收数据集,对应类别,特征索引
#返回特征索引每一个特征值分类最好的类别,以及分类错误的数量
def train_on_feature(X, y_true, feature_index):
values=set(X[:,feature_index])
predictors={}
errors=[]
#计算某个特征其不同特征值分类能力
for current_value in values:
predictors[current_value],error=train_feature_value(X, y_true, feature_index, current_value)
errors.append(error)
return predictors,sum(errors)
#接收训练集
#返回预测器model
#model:feature为用于预测的特征的索引, predictor为特征的特征值对应的预测类别
#model={'feature': 2(特征索引), 'predictor': {0(特征值): 0(对应类别), 1(特征值): 2(类别)}}
def Training(Xd_train, Yd_train):
all_predictors={}
errors={}
#计算每个特征的分类能力
for feature_index in range(Xd_train.shape[1]):
predictor,error=train_on_feature(Xd_train,Yd_train,feature_index)
all_predictors[feature_index]=predictor
errors[feature_index]=error
best_feature,minimum_error=sorted(errors.items(),key=itemgetter(1),reverse=False)[0]
model={'feature':best_feature,'predictor':all_predictors[best_feature]}
return model
#接收测试集,预测模型
#返回预测集
def predict(Xd_test, model):
feature_index=model['feature']
predictor=model['predictor']
y_predicted=np.array([predictor[int(sample[feature_index])] for sample in Xd_test])
return y_predicted
#获取数据集
dataset=load_iris()
X=dataset.data
Y=dataset.target
#数据集离散化
attribute_means=X.mean(axis=0)
X_d=np.array(X>=attribute_means,dtype='int')
#数据集分割
Xd_train,X_test,Yd_train,Y_test=train_test_split(X_d,Y,random_state=14)
#预测模型训练
model=Training(Xd_train, Yd_train)
print("The predict model:{0}".format(model))
#预测模型评估
y_predicted=predict(X_test, model)
accuracy=np.mean(y_predicted==Y_test)*100
print("The test accuracy is {:.1f}%".format(accuracy))



