Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_importance_plot_lim(self):
np.random.seed(1)
dm = xgb.DMatrix(np.random.randn(100, 100), label=[0, 1] * 50)
bst = xgb.train({}, dm)
assert len(bst.get_fscore()) == 71
ax = xgb.plot_importance(bst)
assert ax.get_xlim() == (0., 11.)
assert ax.get_ylim() == (-1., 71.)
ax = xgb.plot_importance(bst, xlim=(0, 5), ylim=(10, 71))
assert ax.get_xlim() == (0., 5.)
assert ax.get_ylim() == (10., 71.)
# plot feature importance using built-in function
from numpy import loadtxt
from xgboost import XGBClassifier
from xgboost import plot_importance
from matplotlib import pyplot
# load data
dataset = loadtxt('pima-indians-diabetes.csv', delimiter=",")
# split data into X and y
X = dataset[:,0:8]
y = dataset[:,8]
# fit model no training data
model = XGBClassifier()
model.fit(X, y)
# plot feature importance
plot_importance(model)
pyplot.show()
y = df['inperiod']
else:
y = df[self.getTarget()]
features = [expr for expr in self.getFeatures() if 'ts' not in expr]
X = df[features]
# X_tr, X_ts, y_tr, y_ts = train_test_split(X, y)
from xgboost import XGBClassifier, plot_importance
clf = XGBClassifier()
print("Training model...")
clf.fit(X, y)
# feature importances
plot_importance(clf)
return explain_weights(clf)
pred_validation[pred_validation<0.5] = 0
print('Validation label ratio (negative/positive) =', len(pred_validation[pred_validation==0])/len(pred_validation[pred_validation==1]))
f1_scores.append(f1_score(y_true=y_validation, y_pred=pred_validation, average='macro'))
# for test
pred_test = xgb_model.predict(xgb_test_data, ntree_limit=xgb_model.best_ntree_limit)
print('Test label ratio (negative/positive) =', len(pred_test[pred_test<0.5])/len(pred_test[pred_test>=0.5]))
# Collect results
if i == 0:
cross_validation_pred = np.array(pred_test).reshape(-1, 1)
else:
cross_validation_pred = np.hstack((cross_validation_pred, np.array(pred_test).reshape(-1, 1)))
print("Feature Importance: (",len(xgb_model.get_fscore()), ")\n", xgb_model.get_fscore()) # Get feature importance of each feature
if plot:
xgb.plot_importance(xgb_model, max_num_features=25)
plt.show()
print("f1 score summary:", f1_scores, "average:", np.mean(f1_scores))
return cross_validation_pred
ylabel : str, default "Features"
Y axis title label. To disable, pass None.
kwargs :
Other keywords passed to ax.barh()
Returns
-------
ax : matplotlib Axes
"""
import xgboost as xgb
if not isinstance(self._df.estimator, xgb.XGBModel):
raise ValueError('estimator must be XGBRegressor or XGBClassifier')
# print(type(self._df.estimator.booster), self._df.estimator.booster)
return xgb.plot_importance(self._df.estimator,
ax=ax, height=height, xlim=xlim, title=title,
xlabel=xlabel, ylabel=ylabel, grid=True, **kwargs)
# print('---------------')
# print('ans(0):'+str(ans[1]))
# print('')
id_list = np.arange(5709, 6108)
data_arr = []
for row in range(0, ans_len):
data_arr.append([int(id_list[row]), ans[row]])
print(ans[row])
np_data = np.array(data_arr)
# 写入文件
pd_data = pd.DataFrame(np_data, columns=['id', 'y'])
pd_data.to_csv('FBP_submit.csv', index=None)
# 显示重要特征
plot_importance(model)
plt.show()
def save_topn_features(self, fname="XGBClassifier_topn_features.txt", topn=10):
ax = xgb.plot_importance(self.model)
yticklabels = ax.get_yticklabels()[::-1]
if topn == -1:
topn = len(yticklabels)
else:
topn = min(topn, len(yticklabels))
with open(fname, "w") as f:
for i in range(topn):
f.write("%s\n"%yticklabels[i].get_text())
def plot_importance(self):
ax = xgb.plot_importance(self.model)
self.save_topn_features()
return ax