1、环境及数据准备
#环境变量 rm(list = ls());#清空变量空间 #载入所需R Rackages library(rattle) library(rpart) library(rpart.plot) library(RWeka) library(party) library(partykit) library(C50) library(export) #-------------------------------------- # 数据导入与预处理 #-------------------------------------- #载入内嵌数据 dataname <- "iris" #载入预处理后的洁净数据 puredata <- datasets::iris #获取数据集列数-->class.id idcol.y <- ncol(puredata) #-------------------------------------- # 划分训练集、测试集、验证集 #-------------------------------------- set.seed(1010) #随机抽样设置种子 (id.train<-sample(nrow(puredata),0.7*nrow(puredata))) #抽样函数 ds.train <- puredata[ id.train,] #生成训练集 ds.test <- puredata[-id.train,] #生成测试集
2、CART算法示例
#--------------------------------------
# > 1 < 【 CART算法 】
# using 'rpart' package
#--------------------------------------
if( ! require('rpart'))
install.packages('rpart')
library(rpart)
model.cart<-rpart(Species ~.,
data=ds.train,
method="class",
parms=list( split="gini" ) ) # 使用CART算法的时候, split = “gini”
printcp( model.cart )
#绘制决策树图形
#rpart.plot( model.cart, branch=1, type=2, fallen.leaves=F, cex=1, sub="决策树模型-CART")
#fancyRpartPlot更漂亮些
if( ! require('rattle'))
install.packages('rattle')
library(rattle)
fancyRpartPlot( model.cart,
main = paste("CART of", dataname, "n(", Sys.time(),")", sep = " "),
sub = "zhengcf@ysu.edu.cn")
#导出决策树至PNG文件
graph2png(file = paste("CART of", dataname))
#利用预测集进行预测
pred.cart <- predict( model.cart,
newdata=ds.test,
type="class" )
#预测结果输出
pred.cart
length( pred.cart )
#输出混淆矩阵
table( ds.test$Species, pred.cart, dnn=c("真实值","预测值") ) #输出混淆矩阵
3、ID3算法示例
#--------------------------------------
# > 2 < 【 ID3算法 】
# using 'rpart' package
#--------------------------------------
if( ! require('rpart'))
install.packages('rpart')
library(rpart)
model.ID3 <- rpart( Species~.,
data=ds.train,
method="class",
parms=list(split="information")) #使用ID3算法时候, split = “information”
printcp( model.ID3 )
#绘制决策树图形
#rpart.plot( model.ID3, branch=1, type=1, fallen.leaves=T, cex=1, sub="决策树模型-ID3")
#fancyRpartPlot更漂亮些,library(rattle)
fancyRpartPlot( model.ID3,
main = paste("ID3 of", dataname, "n(", Sys.time(),")", sep = " "),
sub = "zhengcf@ysu.edu.cn")
#导出决策树至PNG文件
graph2png(file = paste("ID3 of", dataname))
#利用预测集进行预测
pred.ID3 <- predict(model.ID3,
newdata=ds.test,
type="class") #利用预测集进行预测
pred.ID3
length(pred.ID3)
#输出混淆矩阵
table( ds.test$Species, pred.ID3, dnn=c("真实值","预测值") ) #输出混淆矩阵
4、C4.5算法示例
# > 3 < 【 C4.5算法 】
# using 'RWeka' package
#--------------------------------------
if( ! require('RWeka'))
install.packages('RWeka')
library(RWeka)
model.C45 <- J48( Species ~.,
data=ds.train)
#规则输出
model.C45
#绘制决策树图形
plot( model.C45,
main =paste("C45 of", dataname, "n(", Sys.time(),"---zhengcf@ysu.edu.cn)", sep = " "))
#plot( model.C45,type="simple")
#导出决策树至PNG文件
graph2png(file = paste("C45 of", dataname))
#利用预测集进行预测
pred.C45 <- predict(model.C45,
newdata=ds.test,
type="class") #利用预测集进行预测
pred.C45
length(pred.C45)
#输出混淆矩阵
table( ds.test$Species, pred.C45, dnn=c("真实值","预测值") ) #输出混淆矩阵
5、C5.0算法示例
#--------------------------------------
# > 4 < 【 C5.0算法 】
# using 'C50' package
#--------------------------------------
if( ! require('C50'))
install.packages('C50')
library(C50)
tc<-C5.0Control(subset =T,winnow=F,noGlobalPruning=T,minCases =20)#CF=0.25,
#---withoutrules---
model.c50.withoutrules <- C5.0(Species ~.,
data=ds.train,
rules=F,
control =tc)
#规则输出
summary( model.c50.withoutrules )
#C5imp(model.c50.withoutrules)
#绘制决策树图形
plot( model.c50.withoutrules,
main = paste("C50 of", dataname, "withoutrulesn(", Sys.time(),"---zhengcf@ysu.edu.cn)", sep = " "))
#导出决策树至PNG文件
graph2png(file = paste("C50 of", dataname))#,"withoutrules"
#利用预测集进行预测
pred.C50.withoutrules <- predict(model.c50.withoutrules,
newdata=ds.test,
type="class") #利用预测集进行预测
pred.C50.withoutrules
length(pred.C50.withoutrules)
#输出混淆矩阵
table( ds.test$Species, pred.C50.withoutrules, dnn=c("真实值","预测值") ) #输出混淆矩阵
6. 致谢
学习过程中,参考了网络上各位大神贡献的资料,在此一并表示感谢!



