How to use the xgboost.dask function in xgboost

To help you get started, we’ve selected a few xgboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dmlc / xgboost / tests / python / test_with_dask.py View on Github external
def test_from_dask_dataframe(client):
    X, y = generate_array()

    X = dd.from_dask_array(X)
    y = dd.from_dask_array(y)

    dtrain = DaskDMatrix(client, X, y)
    booster = xgb.dask.train(
        client, {}, dtrain, num_boost_round=2)['booster']

    prediction = xgb.dask.predict(client, model=booster, data=dtrain)

    assert prediction.ndim == 1
    assert isinstance(prediction, da.Array)
    assert prediction.shape[0] == kRows

    with pytest.raises(ValueError):
        # evals_result is not supported in dask interface.
        xgb.dask.train(
            client, {}, dtrain, num_boost_round=2, evals_result={})

    prediction = prediction.compute()  # force prediction to be computed
github dmlc / xgboost / tests / python-gpu / test_gpu_with_dask.py View on Github external
def _check_outputs(out, predictions):
            assert isinstance(out['booster'], dxgb.Booster)
            assert len(out['history']['validation']['rmse']) == 2
            assert isinstance(predictions, np.ndarray)
            assert predictions.shape[0] == 1
github h2oai / h2o4gpu / tests / python / open_data / gbm / test_xgboost.py View on Github external
client=client, data=dask_X_train, label=dask_label_train)

            dask_X_test = da.from_array(X_test, partition_size)
            dask_X_test = dask_X_test.persist()
            client.rebalance(dask_X_test)
            dask_label_test = da.from_array(y_test, partition_size)
            dask_label_test = dask_label_test.persist()
            client.rebalance(dask_label_test)

            dtest = DaskDMatrix(
                client=client, data=dask_X_test, label=dask_label_test)

            gpu_res = {}  # Store accuracy result
            tmp = time.time()
            # Train model
            xgb.dask.train(client, param, dtrain, num_boost_round=num_round, evals=[
                (dtest, 'test')])
            print("GPU Training Time: %s seconds" % (str(time.time() - tmp)))

            # TODO: https://github.com/dmlc/xgboost/issues/4518
            dtrain = xgb.DMatrix(X_train, label=y_train, nthread=-1)
            dtest = xgb.DMatrix(X_test, label=y_test, nthread=-1)
            # Repeat for CPU algorithm
            tmp = time.time()
            param['tree_method'] = 'hist'
            cpu_res = {}
            xgb.train(param, dtrain, num_round, evals=[
                (dtest, 'test')], evals_result=cpu_res)
            print("CPU Training Time: %s seconds" % (str(time.time() - tmp)))
github NVIDIA / gbm-bench / algorithms.py View on Github external
def fit(self, data, args):
        params = self.configure(data, args)
        n_workers = None if args.gpus < 0 else args.gpus
        cluster = LocalCUDACluster(n_workers=n_workers,
                                   local_directory=args.root)
        client = Client(cluster)
        n_partitions = len(client.scheduler_info()['workers'])
        X_sliced, y_sliced = self.get_slices(n_partitions,
                                             data.X_train, data.y_train)
        X = da.concatenate([da.from_array(sub_array) for sub_array in X_sliced])
        X = X.rechunk((X_sliced[0].shape[0], data.X_train.shape[1]))
        y = da.concatenate([da.from_array(sub_array) for sub_array in y_sliced])
        y = y.rechunk(X.chunksize[0])
        dtrain = xgb.dask.DaskDMatrix(client, X, y)
        with Timer() as t:
            output = xgb.dask.train(client, params, dtrain, num_boost_round=args.ntrees)
        self.model = output['booster']
        client.close()
        cluster.close()
        return t.interval
github dmlc / xgboost / demo / dask / sklearn_cpu_training.py View on Github external
def main(client):
    # generate some random data for demonstration
    n = 100
    m = 10000
    partition_size = 100
    X = da.random.random((m, n), partition_size)
    y = da.random.random(m, partition_size)

    regressor = xgboost.dask.DaskXGBRegressor(verbosity=1, n_estimators=2)
    regressor.set_params(tree_method='hist')
    # assigning client here is optional
    regressor.client = client

    regressor.fit(X, y, eval_set=[(X, y)])
    prediction = regressor.predict(X)

    bst = regressor.get_booster()
    history = regressor.evals_result()

    print('Evaluation history:', history)
    # returned prediction is always a dask array.
    assert isinstance(prediction, da.Array)
    return bst                  # returning the trained model
github dmlc / xgboost / demo / dask / sklearn_gpu_training.py View on Github external
def main(client):
    # generate some random data for demonstration
    n = 100
    m = 1000000
    partition_size = 10000
    X = da.random.random((m, n), partition_size)
    y = da.random.random(m, partition_size)

    regressor = xgboost.dask.DaskXGBRegressor(verbosity=1)
    regressor.set_params(tree_method='gpu_hist')
    # assigning client here is optional
    regressor.client = client

    regressor.fit(X, y, eval_set=[(X, y)])
    prediction = regressor.predict(X)

    bst = regressor.get_booster()
    history = regressor.evals_result()

    print('Evaluation history:', history)
    # returned prediction is always a dask array.
    assert isinstance(prediction, da.Array)
    return bst                  # returning the trained model
github dmlc / xgboost / demo / dask / cpu_training.py View on Github external
def main(client):
    # generate some random data for demonstration
    m = 100000
    n = 100
    X = da.random.random(size=(m, n), chunks=100)
    y = da.random.random(size=(m, ), chunks=100)

    # DaskDMatrix acts like normal DMatrix, works as a proxy for local
    # DMatrix scatter around workers.
    dtrain = DaskDMatrix(client, X, y)

    # Use train method from xgboost.dask instead of xgboost.  This
    # distributed version of train returns a dictionary containing the
    # resulting booster and evaluation history obtained from
    # evaluation metrics.
    output = xgb.dask.train(client,
                            {'verbosity': 1,
                             'nthread': 1,
                             'tree_method': 'hist'},
                            dtrain,
                            num_boost_round=4, evals=[(dtrain, 'train')])
    bst = output['booster']
    history = output['history']

    # you can pass output directly into `predict` too.
    prediction = xgb.dask.predict(client, bst, dtrain)
    print('Evaluation history:', history)
    return prediction
github h2oai / h2o4gpu / src / interface_py / h2o4gpu / solvers / xgboost.py View on Github external
warm_start=warm_start,
            presort=presort)  # h2o4gpu)

        if random_state is None:
            random_state = 0

        self.distributed = False

        import xgboost as xgb
        from ..util.gpu import device_count
        n_gpus, _ = device_count(n_gpus)
        if n_gpus > 1 and CUDA_DASK_INSTALLED:
            self.distributed = True
            from dask_cuda import LocalCUDACluster
            cluster = LocalCUDACluster(n_workers=n_gpus, threads_per_worker=1)
            self.model_h2o4gpu = xgb.dask.DaskXGBClassifier(
                learning_rate=learning_rate,  # h2o4gpu
                n_estimators=n_estimators,  # h2o4gpu
                subsample=subsample,  # h2o4gpu
                max_depth=max_depth,  # h2o4gpu
                random_state=random_state,  # h2o4gpu
                verbose=verbose,  # h2o4gpu
                colsample_bytree=colsample_bytree,  # h2o4gpu
                colsample_bylevel=colsample_bylevel,
                colsample_bynode=colsample_bynode,
                num_parallel_tree=num_parallel_tree,  # h2o4gpu
                tree_method=tree_method,  # h2o4gpu
                predictor=predictor,  # h2o4gpu
                objective=objective,
                booster=booster,
                n_jobs=n_jobs,
                gamma=gamma,