import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,r2_score
from sklearn import datasets
# CV cross validation 交叉验证
from sklearn.linear_model import LinearRegression,Ridge,Lasso,ElasticNet,ElasticNetCV,LassoCV
diabetes = datasets.load_diabetes()
X = diabetes['data']
y = diabetes['target']
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.15)
lr = LinearRegression()
lr.fit(X_train,y_train)
# 回归问题的得分不是准确率
lr.score(X_test,y_test)
0.39121528487973423
u = ((y_test - y_)**2).sum()
v= ((y_test - y_test.mean())**2).sum()
r = 1-u/v
r
0.39121528487973423
y_ = lr.predict(X_test)
display(y_.round(0),y_test)
array([ 176., 129., 147., 109., 98., 131., 175., 168., 86.,
126., 138., 178., 126., 123., 157., 227., 153., 115.,
197., 184., 164., 101., 230., 111., 202., 178., 56.,
210., 61., 169., 208., 73., 89., 128., 77., 182.,
177., 152., 106., 71., 251., 133., 55., 135., 116.,
77., 249., 159., 37., 175., 123., 70., 102., 181.,
157., 206., 236., 124., 239., 267., 103., 125., 109.,
181., 200., 189., 161.])
array([ 180., 140., 93., 87., 65., 191., 311., 178., 49.,
71., 59., 141., 63., 51., 95., 152., 197., 53.,
178., 198., 154., 31., 128., 72., 186., 147., 78.,
150., 70., 110., 109., 92., 96., 178., 200., 107.,
144., 136., 108., 128., 233., 60., 77., 49., 89.,
77., 243., 144., 104., 174., 127., 59., 81., 52.,
73., 173., 321., 64., 272., 346., 88., 178., 97.,
216., 222., 144., 196.])
r2_score(y_test,y_)
0.39121528487973423
mean_squared_error(y_test,y_)
2958.8336722289791
使用岭回归
ridge = Ridge(alpha = 0.001)
ridge.fit(X_train,y_train)
print(ridge.score(X_test,y_test))
y_ = ridge.predict(X_test)
mean_squared_error(y_test,y_)
0.390516991652
2962.227537851948
from sklearn.linear_model import RidgeCV
ridgeCV = RidgeCV(alphas = np.logspace(-5,0,50),scoring='r2',cv = 6)
ridgeCV.fit(X_train,y_train)
y_ = ridgeCV.predict(X_test)
r2_score(y_test,y_)
C:UsersLXQAnaconda3libsite-packagessklearnmodel_selection_search.py:814: DeprecationWarning: The default of the `iid` parameter will change from True to False in version 0.22 and will be removed in 0.24. This will change numeric results when test-set sizes are unequal.
DeprecationWarning)
0.38857280350418621
ridgeCV = RidgeCV(alphas = np.linspace(0.01,5,50),scoring='r2',cv = 6)
ridgeCV.fit(X_train,y_train)
y_ = ridgeCV.predict(X_test)
r2_score(y_test,y_)
C:UsersLXQAnaconda3libsite-packagessklearnmodel_selection_search.py:814: DeprecationWarning: The default of the `iid` parameter will change from True to False in version 0.22 and will be removed in 0.24. This will change numeric results when test-set sizes are unequal.
DeprecationWarning)
0.38992538207956295
np.logspace(-5,0) #属性极小时,选择此效果比linspace好很多
array([ 1.00000000e-05, 1.26485522e-05, 1.59985872e-05,
2.02358965e-05, 2.55954792e-05, 3.23745754e-05,
4.09491506e-05, 5.17947468e-05, 6.55128557e-05,
8.28642773e-05, 1.04811313e-04, 1.32571137e-04,
1.67683294e-04, 2.12095089e-04, 2.68269580e-04,
3.39322177e-04, 4.29193426e-04, 5.42867544e-04,
6.86648845e-04, 8.68511374e-04, 1.09854114e-03,
1.38949549e-03, 1.75751062e-03, 2.22299648e-03,
2.81176870e-03, 3.55648031e-03, 4.49843267e-03,
5.68986603e-03, 7.19685673e-03, 9.10298178e-03,
1.15139540e-02, 1.45634848e-02, 1.84206997e-02,
2.32995181e-02, 2.94705170e-02, 3.72759372e-02,
4.71486636e-02, 5.96362332e-02, 7.54312006e-02,
9.54095476e-02, 1.20679264e-01, 1.52641797e-01,
1.93069773e-01, 2.44205309e-01, 3.08884360e-01,
3.90693994e-01, 4.94171336e-01, 6.25055193e-01,
7.90604321e-01, 1.00000000e+00])
ridgeCV.get_params()
{'alphas': array([ 1.02329299e+00, 1.29370940e+00, 1.63558632e+00,
2.06780797e+00, 2.61424893e+00, 3.30509292e+00,
4.17850002e+00, 5.28271453e+00, 6.67872986e+00,
8.44365757e+00, 1.06749868e+01, 1.34959693e+01,
1.70624274e+01, 2.15713612e+01, 2.72718303e+01,
3.44787109e+01, 4.35900889e+01, 5.51092486e+01,
6.96724728e+01, 8.80841888e+01, 1.11361403e+02,
1.40789877e+02, 1.77995148e+02, 2.25032320e+02,
2.84499582e+02, 3.59681721e+02, 4.54731565e+02,
5.74899375e+02, 7.26822849e+02, 9.18893768e+02,
1.16172154e+03, 1.46871921e+03, 1.85684439e+03,
2.34753591e+03, 2.96789806e+03, 3.75219771e+03,
4.74375716e+03, 5.99734709e+03, 7.58221192e+03,
9.58589468e+03, 1.21190726e+04, 1.53216706e+04,
1.93705904e+04, 2.44894816e+04, 3.09610960e+04,
3.91429057e+04, 4.94868483e+04, 6.25642915e+04,
7.90975925e+04, 1.00000000e+05]),
'cv': 6,
'fit_intercept': True,
'gcv_mode': None,
'normalize': False,
'scoring': 'r2',
'store_cv_values': False}
ridgeCV.alpha_
1.0232929922807541
y_ = ridgeCV.predict(X_test)
y_
array([ 174.21757428, 116.36524498, 140.61563093, 115.409024 ,
106.91865557, 136.46009365, 177.61587881, 171.55493371,
114.49287297, 147.21520049, 141.67967883, 168.35486338,
155.96002184, 143.76886813, 145.66448268, 211.48115991,
160.5176388 , 135.33336508, 182.52159526, 163.75807674,
156.7901146 , 116.18564336, 195.22357168, 125.01177714,
194.54244966, 159.15173245, 101.447229 , 194.61296108,
91.70025075, 163.10265591, 184.3061834 , 108.8021352 ,
111.48656163, 131.71521489, 103.29434778, 172.02120972,
169.75910978, 139.60048051, 131.68438865, 102.44271956,
220.58170782, 142.37276674, 86.9510951 , 143.97999428,
153.72552455, 100.83182536, 219.96531889, 162.65607595,
100.55936792, 168.06750241, 132.93818115, 91.38722039,
122.42090438, 161.51262153, 156.33645019, 185.48945726,
203.83558559, 131.57352976, 199.71707589, 210.56196227,
125.37817533, 131.94020433, 104.52862623, 161.67645165,
192.46649099, 184.66117152, 165.40544718])
r2_score(y_test,y_)
0.33512764134069251