import numpy as np
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
boston = load_boston()
#print(boston.data)
print(boston.target)
print(boston.feature_names)
import pandas as pd
bos = pd.Dataframe(boston.data)
bos.columns = boston.feature_names
info = pd.Dataframe(boston.target)
#合并为一个dataframe
lg = pd.concat([bos, info],axis=1)
lg.columns=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO','B','LSTAT','PRICE']
x = lg[['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO','B','LSTAT']]
y = lg[['PRICE']]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)
regrregr= LinearRegression()
regrregr.fit(X_train,y_train)
print('各项系数'+str(regrregr.coef_))
print('常数项'+str(regrregr.intercept_))
#评估关于模型的各个参数对y值的影响因素大小
import statsmodels.api as sm
x2 = sm.add_constant(x)
est = sm.OLS(y, x2).fit()
print(est.summary())
# https://www.92python.com/view/251.html
from sklearn.metrics import mean_squared_error #均方误差
from sklearn.metrics import mean_absolute_error #平方绝对误差
from sklearn.metrics import r2_score#
y_predict = regrregr.predict(X_test)
#MSE
MSE = mean_squared_error(y_test, y_predict)
RMSE = np.sqrt(mean_squared_error(y_test,y_predict))
#MAE
MAE = mean_absolute_error(y_test,y_predict)
#R2
R2 = r2_score(y_test,y_predict)
print(MSE, RMSE, MAE, R2)