Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
X_train, y_train = X[:tr_size, :], y[:tr_size]
X_test, y_test = X[tr_size:, :], y[tr_size:]
# First with cpu_predictor
params = {'tree_method': 'gpu_hist',
'predictor': 'cpu_predictor',
'n_jobs': -1,
'seed': 123}
m = xgb.XGBRegressor(**params).fit(X_train, y_train)
cpu_train_score = m.score(X_train, y_train)
cpu_test_score = m.score(X_test, y_test)
# Now with gpu_predictor
params['predictor'] = 'gpu_predictor'
m = xgb.XGBRegressor(**params).fit(X_train, y_train)
gpu_train_score = m.score(X_train, y_train)
gpu_test_score = m.score(X_test, y_test)
assert np.allclose(cpu_train_score, gpu_train_score)
assert np.allclose(cpu_test_score, gpu_test_score)
def test_xgboost_direct():
try:
import xgboost
except Exception as e:
print("Skipping test_xgboost_direct!")
return
import shap
N = 100
M = 4
X = np.random.randn(N,M)
y = np.random.randn(N)
model = xgboost.XGBRegressor()
model.fit(X, y)
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)
assert np.allclose(shap_values[0,:], _brute_force_tree_shap(explainer.model, X[0,:]))
y = iris['target']
X = iris['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X):
xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])
predictions = xgb_model.predict(X[test_index])
actuals = y[test_index]
print(confusion_matrix(actuals, predictions))
print("Boston Housing: regression")
boston = load_boston()
y = boston['target']
X = boston['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X):
xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])
predictions = xgb_model.predict(X[test_index])
actuals = y[test_index]
print(mean_squared_error(actuals, predictions))
print("Parameter optimization")
y = boston['target']
X = boston['data']
xgb_model = xgb.XGBRegressor()
clf = GridSearchCV(xgb_model,
{'max_depth': [2,4,6],
'n_estimators': [50,100,200]}, verbose=1)
clf.fit(X,y)
print(clf.best_score_)
print(clf.best_params_)
# The sklearn API models are picklable
def train(args, pandasData):
# Split data into a labels dataframe and a features dataframe
labels = pandasData[args.label_col].values
features = pandasData[args.feat_cols].values
# Hold out test_percent of the data for testing. We will use the rest for training.
trainingFeatures, testFeatures, trainingLabels, testLabels = train_test_split(features, labels, test_size=args.test_percent)
ntrain, ntest = len(trainingLabels), len(testLabels)
print("Split data randomly into 2 sets: {} training and {} test instances.".format(ntrain, ntest))
# We will use a GBT regressor model.
xgbr = xgb.XGBRegressor(max_depth = args.m_depth, learning_rate = args.learning_rate, n_estimators = args.n_trees)
# Here we train the model and keep track of how long it takes.
start_time = time()
xgbr.fit(trainingFeatures, trainingLabels, eval_metric = args.loss)
# Calculating the score of the model.
r2_score_training = xgbr.score(trainingFeatures, trainingLabels)
r2_score_test = 0
if args.test_percent != 0:
r2_score_test = xgbr.score(testFeatures, testLabels)
timed = time() - start_time
print("Training set score:", r2_score_training)
if args.test_percent != 0:
print("Test set score:", r2_score_test)
#Logging the parameters for viewing later. Can be found in the folder mlruns/.
def grid_search(self, xtr, ytr):
gbm = xgb.XGBRegressor()
reg_cv = GridSearchCV(gbm,
{"colsample_bytree": self.colsample_bytree, "min_child_weight": self.min_child_weight,
'max_depth': self.max_depth, 'n_estimators': self.n_estimators}, verbose=1)
reg_cv.fit(xtr, ytr)
return reg_cv
tree.DecisionTreeClassifier(**TREE_PARAMS),
utils.train_model_classification,
),
(
"regression", "random_forest",
ensemble.RandomForestRegressor(**FOREST_PARAMS),
utils.train_model_regression,
),
(
"classification", "random_forest",
ensemble.RandomForestClassifier(**FOREST_PARAMS),
utils.train_model_classification,
),
(
"regression", "xgboost",
xgboost.XGBRegressor(**XGBOOST_PARAMS),
utils.train_model_regression,
),
(
"classification", "xgboost",
xgboost.XGBClassifier(**XGBOOST_PARAMS),
utils.train_model_classification,
),
(
"regression", "lightgbm",
lightgbm.LGBMRegressor(**LIGHT_GBM_PARAMS),
utils.train_model_regression,
),
(
"classification", "lightgbm",
lightgbm.LGBMClassifier(**LIGHT_GBM_PARAMS),
utils.train_model_classification,
def create_model(self):
# TODO: if learning rates are identical throughout - create a regular Classifier
self.model_params['n_estimators'] = self.best_n_iterations
self.model_params['learning_rate'] = self.learning_rates[0]
self.model_params['n_jobs'] = self.model_params.pop('nthread')
self.model_params['random_state'] = self.model_params.pop('seed')
self.model_params['reg_lambda'] = self.model_params.pop('lambda')
self.model_params['reg_alpha'] = self.model_params.pop('alpha')
final_model = XGBRegressor(**self.model_params)
#final_model = XGBRegressorLR(learning_rates=self.learning_rates, **self.model_params)
return final_model
class XGBRegressorLR(XGBRegressor):
def __init__(self, learning_rates = None,
max_depth=3, learning_rate=1, n_estimators=100,
verbosity=1,
objective="reg:squarederror", booster="gbtree", n_jobs=1, nthread=None, gamma=0,
min_child_weight=1, max_delta_step=0, subsample=0.8, colsample_bytree=1,
colsample_bylevel=1, colsample_bynode=0.8, reg_alpha=0, reg_lambda=1,
scale_pos_weight=1, base_score=0.5, random_state=0, seed=None,
missing=None, **kwargs):
if 'learning_rates' in kwargs:
self.learning_rates = kwargs.pop('learning_rates')
else:
self.learning_rates = learning_rates
super(XGBRegressorLR, self).__init__(
max_depth=max_depth, learning_rate=learning_rate, n_estimators=n_estimators,
def model_builder(model_dir_xgb):
xgboost_model = xgboost.XGBRegressor(
max_depth=max_depth,
learning_rate=learning_rate,
n_estimators=n_estimators,
gamma=gamma,
min_child_weight=min_child_weight,
max_delta_step=max_delta_step,
subsample=subsample,
colsample_bytree=colsample_bytree,
colsample_bylevel=colsample_bylevel,
reg_alpha=reg_alpha,
reg_lambda=reg_lambda,
scale_pos_weight=scale_pos_weight,
base_score=base_score,
seed=seed)
return dc.models.xgboost_models.XGBoostModel(xgboost_model, model_dir_xgb,
**esr)
# Our level 0 classifiers
clfs = [
ExtraTreesRegressor(n_estimators = n_trees * 20),
BaggingRegressor(base_estimator=xgb.XGBRegressor(**xgb_params0), n_estimators=10, random_state=np.random.RandomState(2016) ),
RandomForestRegressor(n_estimators=500, max_depth=5, min_samples_leaf=6, max_features=0.9,\
min_samples_split=1, n_jobs= -1, random_state=2014),
AdaBoostRegressor(base_estimator=None, n_estimators=250, learning_rate=0.03, loss='linear', random_state=20160703),
BaggingRegressor(base_estimator=None, n_estimators=200, max_samples=1.0, max_features=1.0, bootstrap=True, bootstrap_features=False, oob_score=False, warm_start=False, n_jobs=1, random_state=None, verbose=0),
neighbors.KNeighborsRegressor(128, weights="uniform", leaf_size=5),
SVR(kernel='rbf', C=0.2, gamma=0.1),
SVR(kernel='rbf', C=0.3, gamma=0.5),
SVR(kernel='linear', C=0.2),
SVR(kernel='poly', C=0.2, degree=2),
GradientBoostingRegressor(n_estimators=500, max_depth=6, min_samples_split=1, min_samples_leaf=15, learning_rate=0.035, loss='ls',random_state=10),
xgb.XGBRegressor(**xgb_params0),
xgb.XGBRegressor(**xgb_params1),
DecisionTreeRegressor(criterion='mse', splitter='random', max_depth=4, min_samples_split=7, min_samples_leaf=30, min_weight_fraction_leaf=0.0, max_features='sqrt', random_state=None, max_leaf_nodes=None, presort=False)
]
# Ready for cross validation
skf = list(StratifiedKFold(Y_dev, n_folds, shuffle=True))
blend_train = np.zeros((X_dev.shape[0], len(clfs))) # Number of training data x Number of classifiers
blend_test = np.zeros((X_test.shape[0], len(clfs))) # Number of testing data x Number of classifiers
print 'X_test.shape = %s' % (str(X_test.shape))
print 'blend_train.shape = %s' % (str(blend_train.shape))
print 'blend_test.shape = %s' % (str(blend_test.shape))
# For each classifier, we train the number of fold times (=len(skf))
for j, clf in enumerate(clfs):
print 'Training classifier [%s]' % (clf)
print 'Training classifier [%s]' % ((j+1.0)/len(clfs))
import json
import os
feature_map = None
if isinstance(model, (_xgboost.core.Booster, _xgboost.XGBRegressor)):
# Testing a few corner cases that we don't support
if isinstance(model, _xgboost.XGBRegressor):
try:
objective = model.get_xgb_params()["objective"]
except:
objective = None
if objective in ["reg:gamma", "reg:tweedie"]:
raise ValueError("Regression objective '%s' not supported for export." % objective)
# Now use the booster API.
if isinstance(model, _xgboost.XGBRegressor):
# Name change in 0.7
if hasattr(model, 'get_booster'):
model = model.get_booster()
else:
model = model.booster()
# Xgboost sometimes has feature names in there. Sometimes does not.
if (feature_names is None) and (model.feature_names is None):
raise ValueError("Feature names not present in the model. Must be provided during conversion.")
feature_names = model.feature_names
if feature_names is None:
feature_names = model.feature_names
xgb_model_str = model.get_dump(with_stats=True, dump_format = 'json')
if model.feature_names: