How to use the xgboost.plot_importance function in xgboost

To help you get started, we’ve selected a few xgboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dmlc / xgboost / tests / python / test_plotting.py View on Github external
def test_importance_plot_lim(self):
        np.random.seed(1)
        dm = xgb.DMatrix(np.random.randn(100, 100), label=[0, 1] * 50)
        bst = xgb.train({}, dm)
        assert len(bst.get_fscore()) == 71
        ax = xgb.plot_importance(bst)
        assert ax.get_xlim() == (0., 11.)
        assert ax.get_ylim() == (-1., 71.)

        ax = xgb.plot_importance(bst, xlim=(0, 5), ylim=(10, 71))
        assert ax.get_xlim() == (0., 5.)
        assert ax.get_ylim() == (10., 71.)
github rupskygill / ML-mastery / xgboost_with_python_code / 09_automatic_feature_importance.py View on Github external
# plot feature importance using built-in function
from numpy import loadtxt
from xgboost import XGBClassifier
from xgboost import plot_importance
from matplotlib import pyplot
# load data
dataset = loadtxt('pima-indians-diabetes.csv', delimiter=",")
# split data into X and y
X = dataset[:,0:8]
y = dataset[:,8]
# fit model no training data
model = XGBClassifier()
model.fit(X, y)
# plot feature importance
plot_importance(model)
pyplot.show()
github xnmp / lambdaquery / processing.py View on Github external
y = df['inperiod']
    else: 
        y = df[self.getTarget()]
    
    features = [expr for expr in self.getFeatures() if 'ts' not in expr]
    X = df[features]
    
    # X_tr, X_ts, y_tr, y_ts = train_test_split(X, y)
    
    from xgboost import XGBClassifier, plot_importance
    clf = XGBClassifier()
    print("Training model...")
    clf.fit(X, y)
    
    # feature importances
    plot_importance(clf)
    return explain_weights(clf)
github daviddwlee84 / MachineLearningPractice / Project / DCIC2019 / xgboost_Kfold_f1score.py View on Github external
pred_validation[pred_validation<0.5] = 0
        print('Validation label ratio (negative/positive) =', len(pred_validation[pred_validation==0])/len(pred_validation[pred_validation==1]))
        f1_scores.append(f1_score(y_true=y_validation, y_pred=pred_validation, average='macro'))

        # for test
        pred_test = xgb_model.predict(xgb_test_data, ntree_limit=xgb_model.best_ntree_limit)
        print('Test label ratio (negative/positive) =', len(pred_test[pred_test<0.5])/len(pred_test[pred_test>=0.5]))
        # Collect results
        if i == 0:
            cross_validation_pred = np.array(pred_test).reshape(-1, 1)
        else:
            cross_validation_pred = np.hstack((cross_validation_pred, np.array(pred_test).reshape(-1, 1)))

    print("Feature Importance: (",len(xgb_model.get_fscore()), ")\n", xgb_model.get_fscore()) # Get feature importance of each feature
    if plot:
        xgb.plot_importance(xgb_model, max_num_features=25)
        plt.show()

    print("f1 score summary:", f1_scores, "average:", np.mean(f1_scores))

    return cross_validation_pred
github pandas-ml / pandas-ml / pandas_ml / xgboost / base.py View on Github external
ylabel : str, default "Features"
            Y axis title label. To disable, pass None.
        kwargs :
            Other keywords passed to ax.barh()

        Returns
        -------
        ax : matplotlib Axes
        """

        import xgboost as xgb

        if not isinstance(self._df.estimator, xgb.XGBModel):
            raise ValueError('estimator must be XGBRegressor or XGBClassifier')
        # print(type(self._df.estimator.booster), self._df.estimator.booster)
        return xgb.plot_importance(self._df.estimator,
                                   ax=ax, height=height, xlim=xlim, title=title,
                                   xlabel=xlabel, ylabel=ylabel, grid=True, **kwargs)

github acredjb / FBP / FBP_ML_XGBRegressor.py View on Github external
# print('---------------')
    # print('ans(0):'+str(ans[1]))
    # print('')
    id_list = np.arange(5709, 6108)
    data_arr = []
    for row in range(0, ans_len):
        data_arr.append([int(id_list[row]), ans[row]])
        print(ans[row])
    np_data = np.array(data_arr)

    # 写入文件
    pd_data = pd.DataFrame(np_data, columns=['id', 'y'])
    pd_data.to_csv('FBP_submit.csv', index=None)

    # 显示重要特征
    plot_importance(model)
    plt.show()
github ChenglongChen / Kaggle_HomeDepot / Code / Chenglong / utils / xgb_utils.py View on Github external
def save_topn_features(self, fname="XGBClassifier_topn_features.txt", topn=10):
        ax = xgb.plot_importance(self.model)
        yticklabels = ax.get_yticklabels()[::-1]
        if topn == -1:
            topn = len(yticklabels)
        else:
            topn = min(topn, len(yticklabels))
        with open(fname, "w") as f:
            for i in range(topn):
                f.write("%s\n"%yticklabels[i].get_text())
github ChenglongChen / Kaggle_HomeDepot / Code / Chenglong / utils / xgb_utils.py View on Github external
def plot_importance(self):
        ax = xgb.plot_importance(self.model)
        self.save_topn_features()
        return ax