How to use the xgboost.DMatrix function in xgboost

To help you get started, we’ve selected a few xgboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Gofinge / Analysis-of-Stock-High-Frequent-Data-with-LSTM / tests / test_xgboost.py View on Github external
# step 2: Select Feature
data = extract_feature_and_label(data, feature_name_list=conf['feature_name'], label_name_list=conf['label_name'])

# step 3: Preprocess
train, test = divide_train_and_test(data, conf['training_set_proportion'])
train_x, train_y = data_transform_for_xgboost(train)
test_x, test_y = data_transform_for_xgboost(test)
train_y = sign(train_y)
test_y = sign(test_y)
indices = find_all_indices(train_y, 1)
indices.extend(find_all_indices(train_y, -1))
train_x = np.array(train_x)[indices]
train_y = np.array(train_y)[indices]

dtrain = xgb.DMatrix(train_x, train_y)

param = {
    'booster': 'gbtree',
    'silent': True,
    'eta': 0.01,
    'max_depth': 5,
    'gamma': 0.1,
    'objective': 'multi:softmax',
    'num_class': 3,
    'seed': 1000,
    'scale_pos_weight': 1
}

clf = xgb.XGBClassifier(**param)
if conf['use_previous_model'] is False:
    clf.fit(train_x, train_y)
github dmlc / xgboost / tests / python / test_monotone_constraints.py View on Github external
def is_correctly_constrained(learner):
    n = 100
    variable_x = np.linspace(0, 1, n).reshape((n, 1))
    fixed_xs_values = np.linspace(0, 1, n)

    for i in range(n):
        fixed_x = fixed_xs_values[i] * np.ones((n, 1))
        monotonically_increasing_x = np.column_stack((variable_x, fixed_x))
        monotonically_increasing_dset = xgb.DMatrix(monotonically_increasing_x)
        monotonically_increasing_y = learner.predict(
            monotonically_increasing_dset
        )

        monotonically_decreasing_x = np.column_stack((fixed_x, variable_x))
        monotonically_decreasing_dset = xgb.DMatrix(monotonically_decreasing_x)
        monotonically_decreasing_y = learner.predict(
            monotonically_decreasing_dset
        )

        if not (
            is_increasing(monotonically_increasing_y) and
            is_decreasing(monotonically_decreasing_y)
        ):
            return False
github dmlc / xgboost / tests / python / test_fast_hist.py View on Github external
for j in range(X2.shape[1]):
            for i in np.random.choice(X2.shape[0], size=10, replace=False):
                X2[i, j] = 2

        dtrain3 = xgb.DMatrix(X2, label=y2)
        res = {}
        xgb.train(param, dtrain3, 10, [(dtrain3, 'train')], evals_result=res)
        assert self.non_decreasing(res['train']['auc'])
        assert res['train']['auc'][0] >= 0.85

        for j in range(X2.shape[1]):
            for i in np.random.choice(X2.shape[0], size=10, replace=False):
                X2[i, j] = 3

        dtrain4 = xgb.DMatrix(X2, label=y2)
        res = {}
        xgb.train(param, dtrain4, 10, [(dtrain4, 'train')], evals_result=res)
        assert self.non_decreasing(res['train']['auc'])
        assert res['train']['auc'][0] >= 0.85

        # fail-safe test for max_bin=2
        param = {'objective': 'binary:logistic',
                 'tree_method': 'hist',
                 'grow_policy': 'depthwise',
                 'max_depth': 2,
                 'eval_metric': 'auc',
                 'max_bin': 2}
        res = {}
        xgb.train(param, dtrain2, 10, [(dtrain2, 'train')], evals_result=res)
        assert self.non_decreasing(res['train']['auc'])
        assert res['train']['auc'][0] >= 0.85
github h2oai / h2o4gpu / tests / python / xgboost / 01_airline_GPU.py View on Github external
def test_xgboost(clf, X, y):
    ddata = xgb.DMatrix(data=X, label=y, nthread=-1)
    with Timer() as t:
        y_pred = clf.predict(ddata)
    return y_pred, t.interval
github h2oai / h2o4gpu / tests / python / open_data / gbm / test_gpu_prediction_pickledmodel.py View on Github external
def test_predict_nopickle(self):
        X, y = makeXy()

        dm = xgb.DMatrix(X, label=y)
        watchlist = [(dm, 'train')]
        res = {}
        param = {
            "objective": "binary:logistic",
            "predictor": "gpu_predictor",
            'eval_metric': 'auc',
        }
        bst = xgb.train(param, dm, n_estimators,
                        evals=watchlist, evals_result=res)
        assert self.non_decreasing(res["train"]["auc"])

        print("Before model.predict on GPU")
        sys.stdout.flush()
        tmp = time.time()
        gpu_pred = bst.predict(dm, output_margin=True)
        print(gpu_pred)
github NVIDIA / gbm-bench / algorithms.py View on Github external
def test(self, data):
        if isinstance(data.X_test, np.ndarray):
            data.X_test = pd.DataFrame(data=data.X_test, columns=np.arange(0,
                                                                           data.X_test.shape[1]),
                                       index=np.arange(0, data.X_test.shape[0]))
        data.X_test.columns = [str(i) for i in range(0, data.X_test.shape[1])]
        dtest = xgb.DMatrix(data.X_test, data.y_test)
        return self.model.predict(dtest)
github dmlc / xgboost / demo / gpu_acceleration / bosch.py View on Github external
param['eval_metric'] = 'auc'
param['max_depth'] = 5
param['eta'] = 0.3
param['silent'] = 0
param['tree_method'] = 'gpu_exact'

num_round = 20

skf = StratifiedKFold(n_splits=5)

for i, (train, test) in enumerate(skf.split(X, y)):
    dtrain = xgb.DMatrix(X[train], label=y[train])
    tmp = time.time()
    bst = xgb.train(param, dtrain, num_round)
    boost_time = time.time() - tmp
    res = bst.eval(xgb.DMatrix(X[test], label=y[test]))
    print("Fold {}: {}, Boost Time {}".format(i, res, str(boost_time)))
    del bst
github dmlc / xgboost / demo / guide-python / generalized_linear_model.py View on Github external
#!/usr/bin/python
import xgboost as xgb
##
#  this script demonstrate how to fit generalized linear model in xgboost
#  basically, we are using linear model, instead of tree for our boosters
##
dtrain = xgb.DMatrix('../data/agaricus.txt.train')
dtest = xgb.DMatrix('../data/agaricus.txt.test')
# change booster to gblinear, so that we are fitting a linear model
# alpha is the L1 regularizer
# lambda is the L2 regularizer
# you can also set lambda_bias which is L2 regularizer on the bias term
param = {'silent':1, 'objective':'binary:logistic', 'booster':'gblinear',
         'alpha': 0.0001, 'lambda': 1}

# normally, you do not need to set eta (step_size)
# XGBoost uses a parallel coordinate descent algorithm (shotgun),
# there could be affection on convergence with parallelization on certain cases
# setting eta to be smaller value, e.g 0.5 can make the optimization more stable
# param['eta'] = 1

##
# the rest of settings are the same
github hsperr / machine-learning / XGBoostClassifier.py View on Github external
def convert(self, X, y=None):
        if y is None:
            if isinstance(X, xgb.DMatrix):
                return X
            if hasattr(X,'values'):
                X = xgb.DMatrix(X.values)
                return X
            return xgb.DMatrix(X)
        else:
            if hasattr(X,'values'):
                X = xgb.DMatrix(X.values, y.values, missing=np.nan)
                return X
            return xgb.DMatrix(X, y, missing=np.nan)
github wepe / efficient-decision-tree-notes / tgboost / example / higgs_xgb.py View on Github external
train = pd.read_csv('../../data/train.csv').drop(["EventId", "Weight"], axis=1)
val = pd.read_csv('../../data/test.csv').drop(["EventId", "Weight"], axis=1)
train.replace(to_replace=-999., value=np.nan, inplace=True)
train.replace(to_replace='s', value=1, inplace=True)
train.replace(to_replace='b', value=0, inplace=True)
val.replace(to_replace=-999, value=np.nan, inplace=True)
val.replace(to_replace='s', value=1, inplace=True)
val.replace(to_replace='b', value=0, inplace=True)

train_y = train.Label
train_X = train.drop('Label', axis=1)
val_y = val.Label
val_X = val.drop('Label', axis=1)

dtrain = xgb.DMatrix(train_X, label=train_y)
dval = xgb.DMatrix(val_X, label=val_y)

params = {'booster':'gbtree',
          'objective': 'binary:logistic',
          'eta': 0.3,
          'max_depth': 6,
          'num_boost_round': 200,
          'scale_pos_weight': 1.0,
          'subsample': 0.8,
          'colsample_bytree': 0.8,
          'colsample_bylevel': 1.0,
          'min_sample_split': 50,
          'min_child_weight': 1,
          'lambda': 10,
          'gamma': 1,
          'eval_metric': "auc",