Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test():
data = np.random.rand(5,10) # 5 entities, each contains 10 features
label = np.random.randint(2, size=5) # binary target
dtrain = xgb.DMatrix( data, label=label)
dtest = xgb.DMatrix(test)
param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }
evallist = [(dtest,'eval'), (dtrain,'train')]
num_round = 10
bst = xgb.train( param, dtrain, num_round, evallist)
def run_training_continuation(self, use_json):
kRows = 64
kCols = 32
X = np.random.randn(kRows, kCols)
y = np.random.randn(kRows)
dtrain = xgb.DMatrix(X, y)
params = {'tree_method': 'gpu_hist', 'max_depth': '2',
'gamma': '0.1', 'alpha': '0.01',
'enable_experimental_json_serialization': use_json}
bst_0 = xgb.train(params, dtrain, num_boost_round=64)
dump_0 = bst_0.get_dump(dump_format='json')
bst_1 = xgb.train(params, dtrain, num_boost_round=32)
bst_1 = xgb.train(params, dtrain, num_boost_round=32, xgb_model=bst_1)
dump_1 = bst_1.get_dump(dump_format='json')
def recursive_compare(obj_0, obj_1):
if isinstance(obj_0, float):
assert np.isclose(obj_0, obj_1, atol=1e-6)
elif isinstance(obj_0, str):
assert obj_0 == obj_1
elif isinstance(obj_0, int):
assert obj_0 == obj_1
elif isinstance(obj_0, dict):
keys_0 = list(obj_0.keys())
keys_1 = list(obj_1.keys())
def assert_constraint(constraint, tree_method):
n = 1000
X, y = make_regression(n, random_state=rng, n_features=1, n_informative=1)
dtrain = xgb.DMatrix(X, y)
param = {}
param['tree_method'] = tree_method
param['monotone_constraints'] = "(" + str(constraint) + ")"
bst = xgb.train(param, dtrain)
dpredict = xgb.DMatrix(X[X[:, 0].argsort()])
pred = bst.predict(dpredict)
if constraint > 0:
assert non_decreasing(pred)
elif constraint < 0:
assert non_increasing(pred)
# Convert input data from numpy to XGBoost format
dtrain = xgb.DMatrix(X_train, label=y_train, nthread=-1)
dtest = xgb.DMatrix(X_test, label=y_test, nthread=-1)
gpu_res = {} # Store accuracy result
tmp = time.time()
# Train model
xgb.train(param, dtrain, num_round, evals=[
(dtest, 'test')], evals_result=gpu_res)
print("GPU Training Time: %s seconds" % (str(time.time() - tmp)))
# Repeat for CPU algorithm
tmp = time.time()
param['tree_method'] = 'hist'
cpu_res = {}
xgb.train(param, dtrain, num_round, evals=[
(dtest, 'test')], evals_result=cpu_res)
print("CPU Training Time: %s seconds" % (str(time.time() - tmp)))
def test_alpha_and_lambda(self):
params = {
'tree_method': 'exact', 'verbosity': 1,
'objective': 'reg:squarederror',
'eta': 1,
'lambda': 1,
'alpha': 0.1
}
model = xgb.train(params, train_data, 1)
preds = model.predict(train_data)
# Default prediction (with no trees) is 0.5
# sum_grad = (0.5 - 1.0)
# sum_hess = 1.0
# 0.7 = 0.5 - (sum_grad - alpha * sgn(sum_grad)) / (sum_hess + lambda)
assert_approx_equal(preds[0], 0.7)
X_2class = digits_2class['data']
y_2class = digits_2class['target']
X_5class = digits_5class['data']
y_5class = digits_5class['target']
dtrain_2class = xgb.DMatrix(X_2class, label=y_2class)
dtrain_5class = xgb.DMatrix(X_5class, label=y_5class)
gbdt_01 = xgb.train(xgb_params_01, dtrain_2class,
num_boost_round=10)
ntrees_01 = len(gbdt_01.get_dump())
assert ntrees_01 == 10
gbdt_02 = xgb.train(xgb_params_01, dtrain_2class,
num_boost_round=0)
gbdt_02.save_model('xgb_tc.model')
gbdt_02a = xgb.train(xgb_params_01, dtrain_2class,
num_boost_round=10, xgb_model=gbdt_02)
gbdt_02b = xgb.train(xgb_params_01, dtrain_2class,
num_boost_round=10, xgb_model="xgb_tc.model")
ntrees_02a = len(gbdt_02a.get_dump())
ntrees_02b = len(gbdt_02b.get_dump())
assert ntrees_02a == 10
assert ntrees_02b == 10
res1 = mean_squared_error(y_2class, gbdt_01.predict(dtrain_2class))
res2 = mean_squared_error(y_2class, gbdt_02a.predict(dtrain_2class))
assert res1 == res2
X = scale(dataset.X, with_mean=isinstance(dataset.X, np.ndarray))
else:
X = dataset.X
if dataset.use_external_memory:
np.savetxt('tmptmp_1234.csv', np.hstack((dataset.y.reshape(len(dataset.y), 1), X)),
delimiter=',')
dtrain = xgb.DMatrix('tmptmp_1234.csv?format=csv&label_column=0#tmptmp_',
weight=dataset.w)
else:
dtrain = xgb.DMatrix(X, dataset.y, weight=dataset.w)
print("Training on dataset: " + dataset.name, file=sys.stderr)
print("Using parameters: " + str(param), file=sys.stderr)
res = {}
bst = xgb.train(param, dtrain, num_rounds, [(dtrain, 'train')],
evals_result=res, verbose_eval=False)
# Free the booster and dmatrix so we can delete temporary files
bst_copy = bst.copy()
del bst
del dtrain
# Cleanup temporary files
if dataset.use_external_memory:
for f in glob.glob("tmptmp_*"):
os.remove(f)
return {"dataset": dataset, "bst": bst_copy, "param": param.copy(),
"eval": res['train'][dataset.metric]}
###############################
# Quick validation to get a unique name
logger.info(" ===> Validation")
x_trn, x_val, y_trn, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
val_score = xgb_validate(x_trn, x_val, y_trn, y_val, xgb_params, seed_val = 0)
# Cross validation
logger.info(" ===> Cross-Validation")
n_stop = xgb_cross_val(xgb_params, X, y, folds)
n_stop = np.int(n_stop * 1.1) # Full dataset is 25% bigger, so we want a bit of leeway on stopping round to avoid overfitting.
# Training
logger.info(" ===> Training")
xgtrain = xgb.DMatrix(X, y)
classifier = xgb.train(xgb_params, xgtrain, n_stop)
# Output
logger.info(" ===> Start predictions")
xgb_output(X_test, X_test['SK_ID_CURR'], classifier, n_stop, val_score)
# Cleanup
db_conn.close()
end_time = timer()
logger.info(" ===> Success")
logger.info(" Total elapsed time: %s" % (end_time - start_time))
logging.shutdown()
final_logfile = os.path.join('./outputs/', f'{str_timerun}--valid{val_score:.4f}.log')
os.rename(tmp_logfile, final_logfile)
'subsample' : subsample,
#'min_child_weight': min_child_weight,
'objective': "multi:softmax",
'num_class':7,
"eval_metric":'merror',
'silent':False,
# 'gpu_id':1,
# 'max_bin':16,
# 'tree_method': "gpu_exact",
# 'updater':'grow_gpu',
# 'n_gpus':-1,
# 'predictor': "gpu_predictor",
}
model = xgb.train(params, self.train, self.paras.epoch, self.watchlist, feval=Xg_iter_precision)
return model
'lambda': trial.suggest_loguniform('lambda', 1e-8, 1.0),
'alpha': trial.suggest_loguniform('alpha', 1e-8, 1.0)
}
if param['booster'] == 'gbtree' or param['booster'] == 'dart':
param['max_depth'] = trial.suggest_int('max_depth', 1, 9)
param['eta'] = trial.suggest_loguniform('eta', 1e-8, 1.0)
param['gamma'] = trial.suggest_loguniform('gamma', 1e-8, 1.0)
param['grow_policy'] = trial.suggest_categorical('grow_policy', ['depthwise', 'lossguide'])
if param['booster'] == 'dart':
param['sample_type'] = trial.suggest_categorical('sample_type', ['uniform', 'weighted'])
param['normalize_type'] = trial.suggest_categorical('normalize_type', ['tree', 'forest'])
param['rate_drop'] = trial.suggest_loguniform('rate_drop', 1e-8, 1.0)
param['skip_drop'] = trial.suggest_loguniform('skip_drop', 1e-8, 1.0)
bst = xgb.train(param, dtrain)
preds = bst.predict(dtest)
pred_labels = np.rint(preds)
accuracy = sklearn.metrics.accuracy_score(test_y, pred_labels)
return accuracy