Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_load_file_invalid(self):
self.assertRaises(xgb.core.XGBoostError, xgb.Booster,
model_file='incorrect_path')
self.assertRaises(xgb.core.XGBoostError, xgb.Booster,
model_file=u'不正なパス')
def test_Booster_init_invalid_path(self):
"""An invalid model_file path should raise XGBoostError."""
self.assertRaises(xgb.core.XGBoostError, xgb.Booster,
model_file=Path("invalidpath"))
param = {'max_depth': 2, 'learning_rate': 0, 'silent': 1, 'objective': 'binary:logistic'}
evals_result = {}
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0.8, 0.7, 0.6, 0.5],
evals_result=evals_result)
eval_errors = list(map(float, evals_result['eval']['error']))
assert isinstance(bst, xgb.core.Booster)
# validation error should decrease, if learning_rate > 0
assert eval_errors[0] > eval_errors[-1]
# check if learning_rates override default value of eta/learning_rate
param = {'max_depth': 2, 'silent': 1, 'objective': 'binary:logistic'}
evals_result = {}
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0, 0, 0, 0],
evals_result=evals_result)
eval_errors = list(map(float, evals_result['eval']['error']))
assert isinstance(bst, xgb.core.Booster)
# validation error should not decrease, if eta/learning_rate = 0
assert eval_errors[0] == eval_errors[-1]
# learning_rates as a customized decay function
def eta_decay(ithround, num_boost_round):
return num_boost_round / (ithround + 1)
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=eta_decay)
assert isinstance(bst, xgb.core.Booster)
evals_result = {}
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0.8, 0.7, 0.6, 0.5],
evals_result=evals_result)
eval_errors = list(map(float, evals_result['eval']['error']))
assert isinstance(bst, xgb.core.Booster)
# validation error should decrease, if eta > 0
assert eval_errors[0] > eval_errors[-1]
# init learning_rate with 0 to check whether learning_rates work
param = {'max_depth': 2, 'learning_rate': 0, 'verbosity': 0,
'objective': 'binary:logistic'}
evals_result = {}
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0.8, 0.7, 0.6, 0.5],
evals_result=evals_result)
eval_errors = list(map(float, evals_result['eval']['error']))
assert isinstance(bst, xgb.core.Booster)
# validation error should decrease, if learning_rate > 0
assert eval_errors[0] > eval_errors[-1]
# check if learning_rates override default value of eta/learning_rate
param = {'max_depth': 2, 'verbosity': 0, 'objective': 'binary:logistic'}
evals_result = {}
bst = xgb.train(param, dtrain, num_round, watchlist, learning_rates=[0, 0, 0, 0],
evals_result=evals_result)
eval_errors = list(map(float, evals_result['eval']['error']))
assert isinstance(bst, xgb.core.Booster)
# validation error should not decrease, if eta/learning_rate = 0
assert eval_errors[0] == eval_errors[-1]
# learning_rates as a customized decay function
def eta_decay(ithround, num_boost_round):
return num_boost_round / (ithround + 1)
assert dm.num_col() == 3
df = pd.DataFrame([[1, 2., 1], [2, 3., 1]], columns=[4, 5, 6])
dm = xgb.DMatrix(df, label=pd.Series([1, 2]))
assert dm.feature_names == ['4', '5', '6']
assert dm.feature_types == ['int', 'float', 'int']
assert dm.num_row() == 2
assert dm.num_col() == 3
df = pd.DataFrame({'A': ['X', 'Y', 'Z'], 'B': [1, 2, 3]})
dummies = pd.get_dummies(df)
# B A_X A_Y A_Z
# 0 1 1 0 0
# 1 2 0 1 0
# 2 3 0 0 1
result, _, _ = xgb.core._maybe_pandas_data(dummies, None, None)
exp = np.array([[1., 1., 0., 0.],
[2., 0., 1., 0.],
[3., 0., 0., 1.]])
np.testing.assert_array_equal(result, exp)
dm = xgb.DMatrix(dummies)
assert dm.feature_names == ['B', 'A_X', 'A_Y', 'A_Z']
assert dm.feature_types == ['int', 'int', 'int', 'int']
assert dm.num_row() == 3
assert dm.num_col() == 4
df = pd.DataFrame({'A=1': [1, 2, 3], 'A=2': [4, 5, 6]})
dm = xgb.DMatrix(df)
assert dm.feature_names == ['A=1', 'A=2']
assert dm.feature_types == ['int', 'int']
assert dm.num_row() == 3
assert dm.num_col() == 2
def test_glm(self):
param = {'silent': 1, 'objective': 'binary:logistic',
'booster': 'gblinear', 'alpha': 0.0001, 'lambda': 1, 'nthread': 1}
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 4
bst = xgb.train(param, dtrain, num_round, watchlist)
assert isinstance(bst, xgb.core.Booster)
preds = bst.predict(dtest)
labels = dtest.get_label()
err = sum(1 for i in range(len(preds))
if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
assert err < 0.2
def test_pandas_label(self):
# label must be a single column
df = pd.DataFrame({'A': ['X', 'Y', 'Z'], 'B': [1, 2, 3]})
self.assertRaises(ValueError, xgb.core._maybe_pandas_label, df)
# label must be supported dtype
df = pd.DataFrame({'A': np.array(['a', 'b', 'c'], dtype=object)})
self.assertRaises(ValueError, xgb.core._maybe_pandas_label, df)
df = pd.DataFrame({'A': np.array([1, 2, 3], dtype=int)})
result = xgb.core._maybe_pandas_label(df)
np.testing.assert_array_equal(result, np.array([[1.], [2.], [3.]],
dtype=float))
dm = xgb.DMatrix(np.random.randn(3, 2), label=df)
assert dm.num_row() == 3
assert dm.num_col() == 2
classes is expected from the model or class_labels should be provided.
Returns
-------
model_spec: An object of type Model_pb.
Protobuf representation of the model
"""
if not(_HAS_XGBOOST):
raise RuntimeError('xgboost not found. xgboost conversion API is disabled.')
accepted_modes = ["regressor", "classifier"]
if mode not in accepted_modes:
raise ValueError("mode should be in %s" % accepted_modes)
import json
import os
feature_map = None
if isinstance(model, (_xgboost.core.Booster, _xgboost.XGBRegressor, _xgboost.XGBClassifier)):
# Testing a few corner cases that we don't support
if isinstance(model, _xgboost.XGBRegressor):
if mode == "classifier":
raise ValueError("mode is classifier but provided a regressor")
try:
objective = model.get_xgb_params()["objective"]
except:
objective = None
if objective in ["reg:gamma", "reg:tweedie"]:
raise ValueError("Regression objective '%s' not supported for export." % objective)
if isinstance(model, _xgboost.XGBClassifier):
if mode == "regressor":
raise ValueError("mode is regressor but provided a classifier")
n_classes = model.n_classes_
force_32bit_float: bool
If True, then the resulting CoreML model will use 32 bit floats internally.
Returns
-------
model_spec: An object of type Model_pb.
Protobuf representation of the model
"""
if not(_HAS_XGBOOST):
raise RuntimeError('xgboost not found. xgboost conversion API is disabled.')
import json
import os
feature_map = None
if isinstance(model, (_xgboost.core.Booster, _xgboost.XGBRegressor)):
# Testing a few corner cases that we don't support
if isinstance(model, _xgboost.XGBRegressor):
try:
objective = model.get_xgb_params()["objective"]
except:
objective = None
if objective in ["reg:gamma", "reg:tweedie"]:
raise ValueError("Regression objective '%s' not supported for export." % objective)
# Now use the booster API.
if isinstance(model, _xgboost.XGBRegressor):
# Name change in 0.7
if hasattr(model, 'get_booster'):
model = model.get_booster()
else:
interaction effects between all pairs of features for that sample. For models with vector outputs
this returns a list of tensors, one for each output.
"""
assert self.model_output == "margin", "Only model_output = \"margin\" is supported for SHAP interaction values right now!"
assert self.feature_perturbation == "tree_path_dependent", "Only feature_perturbation = \"tree_path_dependent\" is supported for SHAP interaction values right now!"
transform = "identity"
# see if we have a default tree_limit in place.
if tree_limit is None:
tree_limit = -1 if self.model.tree_limit is None else self.model.tree_limit
# shortcut using the C++ version of Tree SHAP in XGBoost
if self.model.model_type == "xgboost":
import xgboost
if not isinstance(X, xgboost.core.DMatrix):
X = xgboost.DMatrix(X)
if tree_limit == -1:
tree_limit = 0
phi = self.model.original_model.predict(X, ntree_limit=tree_limit, pred_interactions=True)
# note we pull off the last column and keep it as our expected_value
if len(phi.shape) == 4:
self.expected_value = [phi[0, i, -1, -1] for i in range(phi.shape[1])]
return [phi[:, i, :-1, :-1] for i in range(phi.shape[1])]
else:
self.expected_value = phi[0, -1, -1]
return phi[:, :-1, :-1]
# convert dataframes
if safe_isinstance(X, "pandas.core.series.Series"):
X = X.values