# 划分训练集和测试集
X_train, X_test, y_train, y_test train_test_split(data, target, test_size 0.2, random_state 1024)
print( Train data length: , len(X_train))
print( Test data length: , len(X_test))
# 转换为Dataset数据格式
lgb_train lgb.Dataset(X_train, y_train)
lgb_eval lgb.Dataset(X_test, y_test, reference lgb_train)
params {
boosting_type : gbdt , # 设置提升类型
objective : multiclass , # 目标函数
num_class :10, # 类别数目
metric : { multi_logloss }, # 评估函数
num_leaves : 31, # 叶子节点数
learning_rate : 0.05, # 学习速率
feature_fraction :0.8, # 如果 feature_fraction 小于 1.0, LightGBM 将会在每次迭代中随机选择部分特征. alias colsample_bytree
feature_fraction_seed : 1,
bagging_fraction : 0.8,
bagging_freq : 5,
bagging_seed : 3,
nthread : 4,
verbose : 1 # 0 显示致命的, 0 显示错误 (警告), 0 显示信息
# 模型训练
evals_result {} # to record eval results
gbm lgb.train(params, lgb_train, num_boost_round 20, valid_sets lgb_eval, evals_result evals_result)
# 模型预测
y_pred gbm.predict(X_test, num_iteration gbm.best_iteration) # shape [B, n_class]
# y_pred np.argmax(y_pred,axis -1) # shape [B,]
print( best_iteration {} .format(gbm.best_iteration))
print( auc %.6f %(roc_auc_score(y_test, y_pred, multi_class ovr )))
print(gbm.best_score)
lgb.plot_importance(gbm)
plt.show()
lgb.plot_metric(evals_result)
plt.show()
更加详细的教程可以见https://github.com/microsoft/LightGBM/tree/master/examples/python-guide
以上是lgb原生使用形式 兼容sklearn的使用方法可以见 LightGBM两种使用方式