波士顿房价数据集——随机森林

波士顿房价数据链接：https://pan.baidu.com/s/1JPrcNl1AgNCKEHCjOGyHvQ
提取码：1234

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import metrics  #评价函数库
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor #导入随机森林
from sklearn.model_selection import GridSearchCV  #网格搜索验证
from sklearn import tree 
import pydotplus #绘制随机森林
from IPython.display import Image,display #显示图像
%matplotlib inline  #在当前环境中显示图像

df = pd.read_csv("D:/波士顿房价预测/boston_housing_data.csv")
df.dropna(inplace=True)  #消除空值
x = df.drop(["MEDV"],axis = 1)  #x选取前13个特征
y = df["MEDV"]  #y选取房价
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state = 0)

#定义网格搜索
param_grid = {
    "n_estimators":[5,10,20,100,200],   #数值均可预设
    "max_depth":[3,5,7],
    "max_features":[0.6,0.7,0.8,1]
}
rf = RandomForestRegressor()
grid = GridSearchCV(rf,param_grid=param_grid,cv = 3) #在网格搜索前提下训练，调参助手——找到最优参数
grid.fit(x_train,y_train)   #训练

grid.best_params_  #查看最好参数

model = grid.best_estimator_  #选中最好的参数作为模型参数
model

plt.figure(figsize=(20,20))
  
estimator = model.estimators_[9]  #显示第9颗树
data = tree.export_graphviz(
    estimator,
    out_file=None,
    filled=True,
    rounded=True
)
graph = pydotplus.graph_from_dot_data(data)
graph
display(Image(graph.create_png()))

model.feature_importances_  #特征重要度分析，数值越大，影响越大

model.predict(x_test) #预测

#计算mse均分误差，开根号得均方根误差
MSE = metrics.mean_squared_error(y_test,model.predict(x_test))
MSE

波士顿房价数据集——随机森林

Python相关栏目本月热门文章