How to use the xgboost.train function in xgboost

To help you get started, we’ve selected a few xgboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github yew1eb / machine-learning / xgboost / DataCastle / testt.py View on Github external
def test():
    data = np.random.rand(5,10) # 5 entities, each contains 10 features
    label = np.random.randint(2, size=5) # binary target
    dtrain = xgb.DMatrix( data, label=label)
    dtest = xgb.DMatrix(test)
    param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
    evallist  = [(dtest,'eval'), (dtrain,'train')]
    num_round = 10
    bst = xgb.train( param, dtrain, num_round, evallist)
github dmlc / xgboost / tests / python-gpu / test_gpu_training_continuation.py View on Github external
def run_training_continuation(self, use_json):
        kRows = 64
        kCols = 32
        X = np.random.randn(kRows, kCols)
        y = np.random.randn(kRows)
        dtrain = xgb.DMatrix(X, y)
        params = {'tree_method': 'gpu_hist', 'max_depth': '2',
                  'gamma': '0.1', 'alpha': '0.01',
                  'enable_experimental_json_serialization': use_json}
        bst_0 = xgb.train(params, dtrain, num_boost_round=64)
        dump_0 = bst_0.get_dump(dump_format='json')

        bst_1 = xgb.train(params, dtrain, num_boost_round=32)
        bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1)
        dump_1 = bst_1.get_dump(dump_format='json')

        def recursive_compare(obj_0, obj_1):
            if isinstance(obj_0, float):
                assert np.isclose(obj_0, obj_1, atol=1e-6)
            elif isinstance(obj_0, str):
                assert obj_0 == obj_1
            elif isinstance(obj_0, int):
                assert obj_0 == obj_1
            elif isinstance(obj_0, dict):
                keys_0 = list(obj_0.keys())
                keys_1 = list(obj_1.keys())
github dmlc / xgboost / tests / python-gpu / test_monotonic_constraints.py View on Github external
def assert_constraint(constraint, tree_method):
    n = 1000
    X, y = make_regression(n, random_state=rng, n_features=1, n_informative=1)
    dtrain = xgb.DMatrix(X, y)
    param = {}
    param['tree_method'] = tree_method
    param['monotone_constraints'] = "(" + str(constraint) + ")"
    bst = xgb.train(param, dtrain)
    dpredict = xgb.DMatrix(X[X[:, 0].argsort()])
    pred = bst.predict(dpredict)
    if constraint > 0:
        assert non_decreasing(pred)
    elif constraint < 0:
        assert non_increasing(pred)
github h2oai / h2o4gpu / tests / python / open_data / gbm / test_xgboost.py View on Github external
# Convert input data from numpy to XGBoost format
    dtrain = xgb.DMatrix(X_train, label=y_train, nthread=-1)
    dtest = xgb.DMatrix(X_test, label=y_test, nthread=-1)

    gpu_res = {}  # Store accuracy result
    tmp = time.time()
    # Train model
    xgb.train(param, dtrain, num_round, evals=[
              (dtest, 'test')], evals_result=gpu_res)
    print("GPU Training Time: %s seconds" % (str(time.time() - tmp)))

    # Repeat for CPU algorithm
    tmp = time.time()
    param['tree_method'] = 'hist'
    cpu_res = {}
    xgb.train(param, dtrain, num_round, evals=[
              (dtest, 'test')], evals_result=cpu_res)
    print("CPU Training Time: %s seconds" % (str(time.time() - tmp)))
github dmlc / xgboost / tests / python / test_tree_regularization.py View on Github external
def test_alpha_and_lambda(self):
        params = {
            'tree_method': 'exact', 'verbosity': 1,
            'objective': 'reg:squarederror',
            'eta': 1,
            'lambda': 1,
            'alpha': 0.1
        }

        model = xgb.train(params, train_data, 1)
        preds = model.predict(train_data)

        # Default prediction (with no trees) is 0.5
        # sum_grad = (0.5 - 1.0)
        # sum_hess = 1.0
        # 0.7 = 0.5 - (sum_grad - alpha * sgn(sum_grad)) / (sum_hess + lambda)
        assert_approx_equal(preds[0], 0.7)
github dmlc / xgboost / tests / python / test_training_continuation.py View on Github external
X_2class = digits_2class['data']
        y_2class = digits_2class['target']

        X_5class = digits_5class['data']
        y_5class = digits_5class['target']

        dtrain_2class = xgb.DMatrix(X_2class, label=y_2class)
        dtrain_5class = xgb.DMatrix(X_5class, label=y_5class)

        gbdt_01 = xgb.train(xgb_params_01, dtrain_2class,
                            num_boost_round=10)
        ntrees_01 = len(gbdt_01.get_dump())
        assert ntrees_01 == 10

        gbdt_02 = xgb.train(xgb_params_01, dtrain_2class,
                            num_boost_round=0)
        gbdt_02.save_model('xgb_tc.model')

        gbdt_02a = xgb.train(xgb_params_01, dtrain_2class,
                             num_boost_round=10, xgb_model=gbdt_02)
        gbdt_02b = xgb.train(xgb_params_01, dtrain_2class,
                             num_boost_round=10, xgb_model="xgb_tc.model")
        ntrees_02a = len(gbdt_02a.get_dump())
        ntrees_02b = len(gbdt_02b.get_dump())
        assert ntrees_02a == 10
        assert ntrees_02b == 10

        res1 = mean_squared_error(y_2class, gbdt_01.predict(dtrain_2class))
        res2 = mean_squared_error(y_2class, gbdt_02a.predict(dtrain_2class))
        assert res1 == res2
github dmlc / xgboost / tests / python / regression_test_utilities.py View on Github external
X = scale(dataset.X, with_mean=isinstance(dataset.X, np.ndarray))
    else:
        X = dataset.X

    if dataset.use_external_memory:
        np.savetxt('tmptmp_1234.csv', np.hstack((dataset.y.reshape(len(dataset.y), 1), X)),
                   delimiter=',')
        dtrain = xgb.DMatrix('tmptmp_1234.csv?format=csv&label_column=0#tmptmp_',
                             weight=dataset.w)
    else:
        dtrain = xgb.DMatrix(X, dataset.y, weight=dataset.w)

    print("Training on dataset: " + dataset.name, file=sys.stderr)
    print("Using parameters: " + str(param), file=sys.stderr)
    res = {}
    bst = xgb.train(param, dtrain, num_rounds, [(dtrain, 'train')],
                    evals_result=res, verbose_eval=False)

    # Free the booster and dmatrix so we can delete temporary files
    bst_copy = bst.copy()
    del bst
    del dtrain

    # Cleanup temporary files
    if dataset.use_external_memory:
        for f in glob.glob("tmptmp_*"):
            os.remove(f)

    return {"dataset": dataset, "bst": bst_copy, "param": param.copy(),
            "eval": res['train'][dataset.metric]}
github mratsim / home-credit-default-risk / m100_predictions.py View on Github external
###############################

# Quick validation to get a unique name
logger.info("   ===> Validation")
x_trn, x_val, y_trn, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
val_score = xgb_validate(x_trn, x_val, y_trn, y_val, xgb_params, seed_val = 0)

# Cross validation
logger.info("   ===> Cross-Validation")
n_stop = xgb_cross_val(xgb_params, X, y, folds)
n_stop = np.int(n_stop * 1.1) # Full dataset is 25% bigger, so we want a bit of leeway on stopping round to avoid overfitting.

# Training
logger.info("   ===> Training")
xgtrain = xgb.DMatrix(X, y)
classifier = xgb.train(xgb_params, xgtrain, n_stop)

# Output
logger.info("   ===> Start predictions")
xgb_output(X_test, X_test['SK_ID_CURR'], classifier, n_stop, val_score)

# Cleanup
db_conn.close()

end_time = timer()
logger.info("   ===>  Success")
logger.info("         Total elapsed time: %s" % (end_time - start_time))
logging.shutdown()

final_logfile = os.path.join('./outputs/', f'{str_timerun}--valid{val_score:.4f}.log')
os.rename(tmp_logfile, final_logfile)
github doncat99 / StockRecommendSystem / Source / StockPrediction / Stock_Prediction_Model_XgBoost.py View on Github external
'subsample' : subsample,
            #'min_child_weight': min_child_weight,
            'objective': "multi:softmax",
            'num_class':7,
            "eval_metric":'merror',
            'silent':False,

            # 'gpu_id':1,
            # 'max_bin':16,
            # 'tree_method': "gpu_exact",
            # 'updater':'grow_gpu',
            # 'n_gpus':-1,
            # 'predictor': "gpu_predictor",

        }
        model = xgb.train(params, self.train, self.paras.epoch, self.watchlist, feval=Xg_iter_precision)
        return model
github optuna / optuna / examples / xgboost_simple.py View on Github external
'lambda': trial.suggest_loguniform('lambda', 1e-8, 1.0),
        'alpha': trial.suggest_loguniform('alpha', 1e-8, 1.0)
    }

    if param['booster'] == 'gbtree' or param['booster'] == 'dart':
        param['max_depth'] = trial.suggest_int('max_depth', 1, 9)
        param['eta'] = trial.suggest_loguniform('eta', 1e-8, 1.0)
        param['gamma'] = trial.suggest_loguniform('gamma', 1e-8, 1.0)
        param['grow_policy'] = trial.suggest_categorical('grow_policy', ['depthwise', 'lossguide'])
    if param['booster'] == 'dart':
        param['sample_type'] = trial.suggest_categorical('sample_type', ['uniform', 'weighted'])
        param['normalize_type'] = trial.suggest_categorical('normalize_type', ['tree', 'forest'])
        param['rate_drop'] = trial.suggest_loguniform('rate_drop', 1e-8, 1.0)
        param['skip_drop'] = trial.suggest_loguniform('skip_drop', 1e-8, 1.0)

    bst = xgb.train(param, dtrain)
    preds = bst.predict(dtest)
    pred_labels = np.rint(preds)
    accuracy = sklearn.metrics.accuracy_score(test_y, pred_labels)
    return accuracy