我做了一个例子,其中包含训练中缺少的值和测试集
我只是使用
SimpleImputer类选择了一种策略,用均值替换丢失的数据。还有其他策略。
from __future__ import print_functionimport numpy as npfrom sklearn.ensemble import RandomForestClassifierfrom sklearn.impute import SimpleImputerX_train = [[0, 0, np.nan], [np.nan, 1, 1]]Y_train = [0, 1]X_test_1 = [0, 0, np.nan]X_test_2 = [0, np.nan, np.nan]X_test_3 = [np.nan, 1, 1]# Create our imputer to replace missing values with the mean e.g.imp = SimpleImputer(missing_values=np.nan, strategy='mean')imp = imp.fit(X_train)# Impute our data, then trainX_train_imp = imp.transform(X_train)clf = RandomForestClassifier(n_estimators=10)clf = clf.fit(X_train_imp, Y_train)for X_test in [X_test_1, X_test_2, X_test_3]: # Impute each test item, then predict X_test_imp = imp.transform(X_test) print(X_test, '->', clf.predict(X_test_imp))# Results[0, 0, nan] -> [0][0, nan, nan] -> [0][nan, 1, 1] -> [1]



