Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
dtrain = dxgb.DaskDMatrix(client, X, y)
out = dxgb.train(client, parameters,
dtrain=dtrain,
evals=[(dtrain, 'validation')],
num_boost_round=2)
predictions = dxgb.predict(client=client, model=out,
data=dtrain).compute()
_check_outputs(out, predictions)
# train has more rows than evals
valid = dtrain
kRows += 1
X = dd.from_array(np.random.randn(kRows, kCols))
y = dd.from_array(np.random.rand(kRows))
dtrain = dxgb.DaskDMatrix(client, X, y)
out = dxgb.train(client, parameters,
dtrain=dtrain,
evals=[(valid, 'validation')],
num_boost_round=2)
predictions = dxgb.predict(client=client, model=out,
data=valid).compute()
_check_outputs(out, predictions)
def test_dask_dataframe(self):
with LocalCUDACluster() as cluster:
with Client(cluster) as client:
X, y = generate_array()
X = dd.from_dask_array(X)
y = dd.from_dask_array(y)
X = X.map_partitions(cudf.from_pandas)
y = y.map_partitions(cudf.from_pandas)
dtrain = dxgb.DaskDMatrix(client, X, y)
out = dxgb.train(client, {'tree_method': 'gpu_hist'},
dtrain=dtrain,
evals=[(dtrain, 'X')],
num_boost_round=2)
assert isinstance(out['booster'], dxgb.Booster)
assert len(out['history']['X']['rmse']) == 2
predictions = dxgb.predict(client, out, dtrain).compute()
assert isinstance(predictions, np.ndarray)
client.rebalance(dask_X_train)
dask_label_train = da.from_array(y_train, partition_size)
dask_label_train = dask_label_train.persist()
client.rebalance(dask_label_train)
dtrain = DaskDMatrix(
client=client, data=dask_X_train, label=dask_label_train)
dask_X_test = da.from_array(X_test, partition_size)
dask_X_test = dask_X_test.persist()
client.rebalance(dask_X_test)
dask_label_test = da.from_array(y_test, partition_size)
dask_label_test = dask_label_test.persist()
client.rebalance(dask_label_test)
dtest = DaskDMatrix(
client=client, data=dask_X_test, label=dask_label_test)
gpu_res = {} # Store accuracy result
tmp = time.time()
# Train model
xgb.dask.train(client, param, dtrain, num_boost_round=num_round, evals=[
(dtest, 'test')])
print("GPU Training Time: %s seconds" % (str(time.time() - tmp)))
# TODO: https://github.com/dmlc/xgboost/issues/4518
dtrain = xgb.DMatrix(X_train, label=y_train, nthread=-1)
dtest = xgb.DMatrix(X_test, label=y_test, nthread=-1)
# Repeat for CPU algorithm
tmp = time.time()
param['tree_method'] = 'hist'
cpu_res = {}
def fit(self, data, args):
params = self.configure(data, args)
n_workers = None if args.gpus < 0 else args.gpus
cluster = LocalCUDACluster(n_workers=n_workers,
local_directory=args.root)
client = Client(cluster)
n_partitions = len(client.scheduler_info()['workers'])
X_sliced, y_sliced = self.get_slices(n_partitions,
data.X_train, data.y_train)
X = da.concatenate([da.from_array(sub_array) for sub_array in X_sliced])
X = X.rechunk((X_sliced[0].shape[0], data.X_train.shape[1]))
y = da.concatenate([da.from_array(sub_array) for sub_array in y_sliced])
y = y.rechunk(X.chunksize[0])
dtrain = xgb.dask.DaskDMatrix(client, X, y)
with Timer() as t:
output = xgb.dask.train(client, params, dtrain, num_boost_round=args.ntrees)
self.model = output['booster']
client.close()
cluster.close()
return t.interval
def main(client):
# generate some random data for demonstration
m = 100000
n = 100
X = da.random.random(size=(m, n), chunks=100)
y = da.random.random(size=(m, ), chunks=100)
# DaskDMatrix acts like normal DMatrix, works as a proxy for local
# DMatrix scatter around workers.
dtrain = DaskDMatrix(client, X, y)
# Use train method from xgboost.dask instead of xgboost. This
# distributed version of train returns a dictionary containing the
# resulting booster and evaluation history obtained from
# evaluation metrics.
output = xgb.dask.train(client,
{'verbosity': 2,
'nthread': 1,
# Golden line for GPU training
'tree_method': 'gpu_hist'},
dtrain,
num_boost_round=4, evals=[(dtrain, 'train')])
bst = output['booster']
history = output['history']
# you can pass output directly into `predict` too.
def main(client):
# generate some random data for demonstration
m = 100000
n = 100
X = da.random.random(size=(m, n), chunks=100)
y = da.random.random(size=(m, ), chunks=100)
# DaskDMatrix acts like normal DMatrix, works as a proxy for local
# DMatrix scatter around workers.
dtrain = DaskDMatrix(client, X, y)
# Use train method from xgboost.dask instead of xgboost. This
# distributed version of train returns a dictionary containing the
# resulting booster and evaluation history obtained from
# evaluation metrics.
output = xgb.dask.train(client,
{'verbosity': 1,
'nthread': 1,
'tree_method': 'hist'},
dtrain,
num_boost_round=4, evals=[(dtrain, 'train')])
bst = output['booster']
history = output['history']
# you can pass output directly into `predict` too.
prediction = xgb.dask.predict(client, bst, dtrain)