How to use the xgboost.cv function in xgboost

To help you get started, we’ve selected a few xgboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dmlc / xgboost / tests / python / test_with_pandas.py View on Github external
params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
                  'objective': 'binary:logistic', 'eval_metric': ['auc']}
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
                    as_pandas=True, metrics='error')
        assert 'eval_metric' in params
        assert 'auc' not in cv.columns[0]
        assert 'error' in cv.columns[0]

        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
                    as_pandas=True, metrics=['error'])
        assert 'eval_metric' in params
        assert 'auc' not in cv.columns[0]
        assert 'error' in cv.columns[0]

        params = list(params.items())
        cv = xgb.cv(params, dm, num_boost_round=10, nfold=10,
                    as_pandas=True, metrics=['error'])
        assert isinstance(params, list)
        assert 'auc' not in cv.columns[0]
        assert 'error' in cv.columns[0]
github aarshayj / AJ_ML_Library / models_classification.py View on Github external
def modelfit(self, performCV=True, useTrainCV=False, TrainCVFolds=5, early_stopping_rounds=20, show_progress=True, printTopN='all'):

        if useTrainCV:
            xgb_param = self.alg.get_xgb_params()
            if self.num_class>2:
                xgb_param['num_class']=self.num_class
            cvresult = xgb.cv(xgb_param,self.xgtrain, num_boost_round=self.alg.get_params()['n_estimators'], nfold=self.cv_folds,
                metrics=self.scoring_metric_xgb, early_stopping_rounds=early_stopping_rounds, show_progress=show_progress)
            self.alg.set_params(n_estimators=cvresult.shape[0])

        print self.alg.get_params()
        obj = self.alg.fit(self.data_train[self.predictors], self.data_train[self.target], eval_metric=self.eval_metric)
        
        #Print feature importance
        # self.set_feature_importance()
        self.feature_imp = pd.Series(self.alg.booster().get_fscore()).sort_values(ascending=False)
        num_print = len(self.feature_imp)
        if printTopN != 'all':
            num_print = min(printTopN,len(self.feature_imp))
        self.feature_imp.iloc[:num_print-1].plot(kind='bar', title='Feature Importances')
        plt.ylabel('Feature Importance Score')
        plt.show(block=False)
github JianWenJun / MLDemo / ML / DecisionTree / xgboost_demo.py View on Github external
def modelfit(alg,dtrain_x,dtrain_y,useTrainCV=True,cv_flods=5,early_stopping_rounds=50):
    """
    :param alg: 初始模型
    :param dtrain_x:训练数据X
    :param dtrain_y:训练数据y(label)
    :param useTrainCV: 是否使用cv函数来确定最佳n_estimators
    :param cv_flods:交叉验证的cv数
    :param early_stopping_rounds:在该数迭代次数之前,eval_metric都没有提升的话则停止
    """
    if useTrainCV:
        xgb_param = alg.get_xgb_params()
        xgtrain = xgb.DMatrix(dtrain_x,dtrain_y)
        cv_result = xgb.cv(xgb_param,xgtrain,num_boost_round = alg.get_params()['n_estimators'],
                           nfold = cv_flods,metrics = 'auc',early_stopping_rounds=early_stopping_rounds)

        # print(cv_result)
        alg.set_params(n_estimators=cv_result.shape[0])

    # train data
    alg.fit(train_X,train_y,eval_metric='auc')

    #predict train data
    train_y_pre = alg.predict(train_X)

    print ("\nModel Report")
    print ("Accuracy : %.4g" % metrics.accuracy_score(train_y, train_y_pre))

    feat_imp = pd.Series(alg.get_booster().get_fscore()).sort_values(ascending=False)
    feat_imp.plot(kind = 'bar',title='Feature Importance')
github gatapia / py_ml_utils / XGBoostClassifier.py View on Github external
'eta': self.eta,
      'gamma': self.gamma,
      'max_depth': self.max_depth,
      'min_child_weight': self.min_child_weight,
      'subsample': self.subsample,
      'colsample_bytree': self.colsample_bytree,
      'max_delta_step': self.max_delta_step,
      'l': self.l,
      'alpha': self.alpha,
      'lambda_bias': self.lambda_bias,
      'objective': self.objective,
      'eval_metric': self.eval_metric,
      'seed': self.seed,
      'num_class': self.num_class,
    }    
    results = xgb.cv(param, X, self.num_round, cv)
    return results
github apple / turicreate / src / external / xgboost / demo / guide-python / cross_validation.py View on Github external
#!/usr/bin/python
import numpy as np
import xgboost as xgb

### load data in do training
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic'}
num_round = 2

print ('running cross validation')
# do cross validation, this will print result out as
# [iteration]  metric_name:mean_value+std_value
# std_value is standard deviation of the metric
xgb.cv(param, dtrain, num_round, nfold=5,
       metrics={'error'}, seed = 0)

print ('running cross validation, disable standard deviation display')
# do cross validation, this will print result out as
# [iteration]  metric_name:mean_value+std_value
# std_value is standard deviation of the metric
xgb.cv(param, dtrain, num_round, nfold=5,
       metrics={'error'}, seed = 0, show_stdv = False)

print ('running cross validation, with preprocessing function')
# define the preprocessing function
# used to return the preprocessed training, test data, and parameter
# we can use this to do weight rescale, etc.
# as a example, we try to set scale_pos_weight
def fpreproc(dtrain, dtest, param):
    label = dtrain.get_label()
github MizioAnd / PortfolioTimeSeriesAnalysis / two_sigma_financial_modelling.py View on Github external
xgb_params = {
                'seed': 0,
                'colsample_bytree': 0.8,
                'silent': 1,
                'subsample': 0.6,
                'learning_rate': 0.01,
                # 'booster': 'gblinear',  # default is gbtree
                'objective': 'reg:linear',
                'max_depth': 1,
                'num_parallel_tree': 1,
                'min_child_weight': 1,
                'eval_metric': 'rmse',
            }

            res = xgb.cv(xgb_params, dtrain, num_boost_round=10000, nfold=5, seed=seed, stratified=False,
                         early_stopping_rounds=100, verbose_eval=10, show_stdv=True)

            best_nrounds = res.shape[0] - 1
            cv_mean = res.iloc[-1, 0]
            cv_std = res.iloc[-1, 1]

            print('Ensemble-CV: {0}+{1}'.format(cv_mean, cv_std))
            title_name = 'xgb.cv'
            two_sigma_fin_mod_tools.predicted_vs_actual_y_xgb(xgb, best_nrounds, xgb_params, x_train, test_data,
                                                              y_train, y_test_data, title_name)
            # plt.show()
            gbdt = xgb.train(xgb_params, dtrain, best_nrounds)
            output_xgb_cv = gbdt.predict(dtest)

        # Averaging the output using four different machine learning estimators
        output = (output_feature_selection_lasso + output_xgb_cv) / 2.0
github apple / turicreate / src / external / xgboost / demo / kaggle-higgs / higgs-cv.py View on Github external
ratio = float(np.sum(label == 0)) / np.sum(label==1)
    param['scale_pos_weight'] = ratio
    wtrain = dtrain.get_weight()
    wtest = dtest.get_weight()
    sum_weight = sum(wtrain) + sum(wtest)
    wtrain *= sum_weight / sum(wtrain)
    wtest *= sum_weight / sum(wtest)
    dtrain.set_weight(wtrain)
    dtest.set_weight(wtest)
    return (dtrain, dtest, param)

# do cross validation, for each fold
# the dtrain, dtest, param will be passed into fpreproc
# then the return value of fpreproc will be used to generate
# results of that fold
xgb.cv(param, dtrain, num_round, nfold=5,
       metrics={'ams@0.15', 'auc'}, seed = 0, fpreproc = fpreproc)
github ternaus / kaggle_allstate / src / xgb_bayesian.py View on Github external
def xgb_evaluate(min_child_weight,
                 colsample_bytree,
                 max_depth,
                 subsample,
                 gamma,
                 alpha):

    params['min_child_weight'] = int(min_child_weight)
    params['cosample_bytree'] = max(min(colsample_bytree, 1), 0)
    params['max_depth'] = int(max_depth)
    params['subsample'] = max(min(subsample, 1), 0)
    params['gamma'] = max(gamma, 0)
    params['alpha'] = max(alpha, 0)

    cv_result = xgb.cv(params, xgtrain, num_boost_round=num_rounds, nfold=5,
                         seed=random_state,
                         feval=evalerror,
                         callbacks=[xgb.callback.early_stop(50)])

    return -cv_result['test-mae-mean'].values[-1]
github Speedml / speedml / speedml / xgb.py View on Github external
def cv(self, grid_params):
        """
        Calculate the Cross-Validation (CV) score for XGBoost model based on ``grid_params`` parameters. Sets xgb.cv_results variable to the resulting dataframe.
        """
        xgdmat = xgb.DMatrix(Base.train_X, Base.train_y)
        self.cv_results = xgb.cv(
            params = grid_params, dtrain = xgdmat,
            num_boost_round = 1000, nfold = 5,
            metrics = ['error'], early_stopping_rounds = 20)
        self.error = self.cv_results.get_value(len(self.cv_results) - 1, 'test-error-mean')