How to use xgboost - 10 common examples

To help you get started, we’ve selected a few xgboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Gofinge / Analysis-of-Stock-High-Frequent-Data-with-LSTM / tests / test_xgboost.py View on Github external
# step 2: Select Feature
data = extract_feature_and_label(data, feature_name_list=conf['feature_name'], label_name_list=conf['label_name'])

# step 3: Preprocess
train, test = divide_train_and_test(data, conf['training_set_proportion'])
train_x, train_y = data_transform_for_xgboost(train)
test_x, test_y = data_transform_for_xgboost(test)
train_y = sign(train_y)
test_y = sign(test_y)
indices = find_all_indices(train_y, 1)
indices.extend(find_all_indices(train_y, -1))
train_x = np.array(train_x)[indices]
train_y = np.array(train_y)[indices]

dtrain = xgb.DMatrix(train_x, train_y)

param = {
    'booster': 'gbtree',
    'silent': True,
    'eta': 0.01,
    'max_depth': 5,
    'gamma': 0.1,
    'objective': 'multi:softmax',
    'num_class': 3,
    'seed': 1000,
    'scale_pos_weight': 1
}

clf = xgb.XGBClassifier(**param)
if conf['use_previous_model'] is False:
    clf.fit(train_x, train_y)
github yew1eb / machine-learning / xgboost / DataCastle / testt.py View on Github external
def test():
    data = np.random.rand(5,10) # 5 entities, each contains 10 features
    label = np.random.randint(2, size=5) # binary target
    dtrain = xgb.DMatrix( data, label=label)
    dtest = xgb.DMatrix(test)
    param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
    evallist  = [(dtest,'eval'), (dtrain,'train')]
    num_round = 10
    bst = xgb.train( param, dtrain, num_round, evallist)
github dmlc / xgboost / tests / python-gpu / test_gpu_prediction.py View on Github external
X_train, y_train = X[:tr_size, :], y[:tr_size]
        X_test, y_test = X[tr_size:, :], y[tr_size:]

        # First with cpu_predictor
        params = {'tree_method': 'gpu_hist',
                  'predictor': 'cpu_predictor',
                  'n_jobs': -1,
                  'seed': 123}
        m = xgb.XGBRegressor(**params).fit(X_train, y_train)
        cpu_train_score = m.score(X_train, y_train)
        cpu_test_score = m.score(X_test, y_test)

        # Now with gpu_predictor
        params['predictor'] = 'gpu_predictor'

        m = xgb.XGBRegressor(**params).fit(X_train, y_train)
        gpu_train_score = m.score(X_train, y_train)
        gpu_test_score = m.score(X_test, y_test)

        assert np.allclose(cpu_train_score, gpu_train_score)
        assert np.allclose(cpu_test_score, gpu_test_score)
github slundberg / shap / tests / explainers / test_tree.py View on Github external
def test_xgboost_direct():
    try:
        import xgboost
    except Exception as e:
        print("Skipping test_xgboost_direct!")
        return
    import shap

    N = 100
    M = 4
    X = np.random.randn(N,M)
    y = np.random.randn(N)  

    model = xgboost.XGBRegressor()
    model.fit(X, y)

    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X)

    assert np.allclose(shap_values[0,:], _brute_force_tree_shap(explainer.model, X[0,:]))
github h2oai / h2o4gpu / tests / python / open_data / gbm / test_xgb_sklearn_wrapper.py View on Github external
def test_xgboost_classifier(output_margin):
    import xgboost as xgb
    df = pd.read_csv("./open_data/creditcard.csv")
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')

    gbm = xgb.sklearn.XGBClassifier()

    gbm.fit(X, y)

    gbm.predict(X, output_margin=output_margin)

    gbm.predict_proba(X, output_margin=output_margin)
github lyft / flytekit / tests / flytekit / common / workflows / sagemaker.py View on Github external
custom_input1,
        custom_input2,
        model,
        custom_output1,
    ):

    with train as reader:
        train_df = reader.read(concat=True)
        dtrain_x = xgb.DMatrix(train_df[:-1])
        dtrain_y = xgb.DMatrix(train_df[-1])
    with validation as reader:
        validation_df = reader.read(concat=True)
        dvalidation_x = xgb.DMatrix(validation_df[:-1])
        dvalidation_y = xgb.DMatrix(validation_df[-1])

    my_model = xgb.XGBModel(**static_hyperparameters)

    my_model.fit(dtrain_x,
                 dtrain_y,
                 eval_set=[(dvalidation_x, dvalidation_y)],
                 eval_metric=sample_eval_function)

    model.set(my_model)
    custom_output1.set(my_model.evals_result())
github slundberg / shap / tests / explainers / test_tree.py View on Github external
def test_xgboost_ranking():
    try:
        import xgboost
    except:
        print("Skipping test_xgboost_ranking!")
        return
    import shap

    # train lightgbm ranker model
    x_train, y_train, x_test, y_test, q_train, q_test = shap.datasets.rank()
    params = {'objective': 'rank:pairwise', 'learning_rate': 0.1,
              'gamma': 1.0, 'min_child_weight': 0.1,
              'max_depth': 4, 'n_estimators': 4}
    model = xgboost.sklearn.XGBRanker(**params)
    model.fit(x_train, y_train, q_train.astype(int),
              eval_set=[(x_test, y_test)], eval_group=[q_test.astype(int)])
    _validate_shap_values(model, x_test)
github h2oai / h2o4gpu / tests / python / open_data / gbm / test_xgb_sklearn_wrapper.py View on Github external
def test_xgboost_regression(output_margin):
    import xgboost as xgb
    df = pd.read_csv("./open_data/creditcard.csv")
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')

    gbm = xgb.sklearn.XGBRegressor()

    gbm.fit(X, y)

    gbm.predict(X, output_margin=output_margin)
github slundberg / shap / tests / explainers / test_tree.py View on Github external
def test_xgboost_ranking():
    try:
        import xgboost
    except:
        print("Skipping test_xgboost_ranking!")
        return
    import shap

    # train lightgbm ranker model
    x_train, y_train, x_test, y_test, q_train, q_test = shap.datasets.rank()
    params = {'objective': 'rank:pairwise', 'learning_rate': 0.1,
              'gamma': 1.0, 'min_child_weight': 0.1,
              'max_depth': 4, 'n_estimators': 4}
    model = xgboost.sklearn.XGBRanker(**params)
    model.fit(x_train, y_train, q_train.astype(int),
              eval_set=[(x_test, y_test)], eval_group=[q_test.astype(int)])
    _validate_shap_values(model, x_test)
github dmlc / xgboost / tests / distributed / distributed_gpu.py View on Github external
def run_test(name, params_fun):
    """Runs a distributed GPU test."""
    # Always call this before using distributed module
    xgb.rabit.init()
    rank = xgb.rabit.get_rank()
    world = xgb.rabit.get_world_size()

    # Load file, file will be automatically sharded in distributed mode.
    dtrain = xgb.DMatrix('../../demo/data/agaricus.txt.train')
    dtest = xgb.DMatrix('../../demo/data/agaricus.txt.test')

    params, n_rounds = params_fun(rank)

    # Specify validations set to watch performance
    watchlist = [(dtest, 'eval'), (dtrain, 'train')]

    # Run training, all the features in training API is available.
    # Currently, this script only support calling train once for fault recovery purpose.
    bst = xgb.train(params, dtrain, n_rounds, watchlist, early_stopping_rounds=2)

    # Have each worker save its model
    model_name = "test.model.%s.%d" % (name, rank)