How to use the xgboost.dask.train function in xgboost

To help you get started, we’ve selected a few xgboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dmlc / xgboost / tests / python-gpu / test_gpu_with_dask.py View on Github external
def test_dask_dataframe(self):
        with LocalCUDACluster() as cluster:
            with Client(cluster) as client:
                X, y = generate_array()

                X = dd.from_dask_array(X)
                y = dd.from_dask_array(y)

                X = X.map_partitions(cudf.from_pandas)
                y = y.map_partitions(cudf.from_pandas)

                dtrain = dxgb.DaskDMatrix(client, X, y)
                out = dxgb.train(client, {'tree_method': 'gpu_hist'},
                                 dtrain=dtrain,
                                 evals=[(dtrain, 'X')],
                                 num_boost_round=2)

                assert isinstance(out['booster'], dxgb.Booster)
                assert len(out['history']['X']['rmse']) == 2

                predictions = dxgb.predict(client, out, dtrain).compute()
                assert isinstance(predictions, np.ndarray)
github dmlc / xgboost / tests / python-gpu / test_gpu_with_dask.py View on Github external
assert isinstance(out['booster'], dxgb.Booster)
            assert len(out['history']['validation']['rmse']) == 2
            assert isinstance(predictions, np.ndarray)
            assert predictions.shape[0] == 1

        parameters = {'tree_method': 'gpu_hist', 'verbosity': 3,
                      'debug_synchronize': True}

        with LocalCUDACluster() as cluster:
            with Client(cluster) as client:
                kRows, kCols = 1, 97
                X = dd.from_array(np.random.randn(kRows, kCols))
                y = dd.from_array(np.random.rand(kRows))
                dtrain = dxgb.DaskDMatrix(client, X, y)

                out = dxgb.train(client, parameters,
                                 dtrain=dtrain,
                                 evals=[(dtrain, 'validation')],
                                 num_boost_round=2)
                predictions = dxgb.predict(client=client, model=out,
                                           data=dtrain).compute()
                _check_outputs(out, predictions)

                # train has more rows than evals
                valid = dtrain
                kRows += 1
                X = dd.from_array(np.random.randn(kRows, kCols))
                y = dd.from_array(np.random.rand(kRows))
                dtrain = dxgb.DaskDMatrix(client, X, y)

                out = dxgb.train(client, parameters,
                                 dtrain=dtrain,
github dmlc / xgboost / tests / python / test_with_dask.py View on Github external
X = dd.from_dask_array(X)
    y = dd.from_dask_array(y)

    dtrain = DaskDMatrix(client, X, y)
    booster = xgb.dask.train(
        client, {}, dtrain, num_boost_round=2)['booster']

    prediction = xgb.dask.predict(client, model=booster, data=dtrain)

    assert prediction.ndim == 1
    assert isinstance(prediction, da.Array)
    assert prediction.shape[0] == kRows

    with pytest.raises(ValueError):
        # evals_result is not supported in dask interface.
        xgb.dask.train(
            client, {}, dtrain, num_boost_round=2, evals_result={})

    prediction = prediction.compute()  # force prediction to be computed
github dmlc / xgboost / demo / dask / gpu_training.py View on Github external
def main(client):
    # generate some random data for demonstration
    m = 100000
    n = 100
    X = da.random.random(size=(m, n), chunks=100)
    y = da.random.random(size=(m, ), chunks=100)

    # DaskDMatrix acts like normal DMatrix, works as a proxy for local
    # DMatrix scatter around workers.
    dtrain = DaskDMatrix(client, X, y)

    # Use train method from xgboost.dask instead of xgboost.  This
    # distributed version of train returns a dictionary containing the
    # resulting booster and evaluation history obtained from
    # evaluation metrics.
    output = xgb.dask.train(client,
                            {'verbosity': 2,
                             'nthread': 1,
                             # Golden line for GPU training
                             'tree_method': 'gpu_hist'},
                            dtrain,
                            num_boost_round=4, evals=[(dtrain, 'train')])
    bst = output['booster']
    history = output['history']

    # you can pass output directly into `predict` too.
    prediction = xgb.dask.predict(client, bst, dtrain)
    prediction = prediction.compute()
    print('Evaluation history:', history)
    return prediction