Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def find_best_xgb_estimator(X, y, cv, param_comb):
# Random search over specified parameter values for XGBoost.
# Exhaustive search takes many more cycles w/o much benefit.
# Returns optimized XGBoost estimator.
# Ref: https://www.kaggle.com/tilii7/hyperparameter-grid-search-with-xgboost
print('\n Finding best XGBoost estimator...')
param_grid = {
'min_child_weight': [1, 5, 10],
'gamma': [0.5, 1, 1.5, 2, 5],
'subsample': [0.6, 0.8, 1.0],
'colsample_bytree': [0.6, 0.8, 1.0],
'max_depth': [3, 4, 5]
}
init_est = xgb(learning_rate=0.02, n_estimators=600, objective='multi:softprob',
verbose=1, nthread=1)
random_search = RandomizedSearchCV(estimator=init_est, param_distributions=param_grid,
n_iter=param_comb, n_jobs=4, iid=False, cv=cv,
verbose=1, random_state=RANDOM_SEED)
random_search.fit(X, y)
#print('\n All results:')
#print(random_search.cv_results_)
print('\n Best estimator:')
print(random_search.best_estimator_)
print('\n Best normalized gini score for %d-fold search with %d parameter combinations:' %
(FOLDS, PARA_COMB))
print(random_search.best_score_)
print('\n Best hyperparameters:')
print(random_search.best_params_)
return random_search.best_estimator_
def _xgboost_predict(self, train_x, train_y, test_x, test_y):
model = xgb.XGBClassifier(seed=self._seed, n_estimators=100, max_depth=3, learning_rate=0.1)
self._xgboost_model = model # hack
eval_metric = self._eval_metric or 'error'
if test_y is None:
model.fit(train_x, train_y, eval_metric=eval_metric, verbose=10)
else:
eval_set = [(train_x, train_y), (test_x, test_y)]
model.fit(train_x, train_y, eval_set=eval_set, eval_metric=eval_metric,
verbose=10)
if self._eval_metric == 'mlogloss':
return model.predict_proba(test_x)
else:
return model.predict(test_x)
def classification(labels, testLabels):
with open("../Labels/feature_csltp", 'rb') as f:
X_train = cPickle.load(f)
with open("../Labels/feature_test_csltp", "rb") as f:
X_test = cPickle.load(f)
y_train = labels
y_test = testLabels
# split data into train and test sets
# seed = 7
# test_size = 0.33
# X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)
model = XGBClassifier(n_estimators=400)
model.fit(X_train, y_train)
# make predictions for test data
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]
# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print "Accuracy: %.2f%%" % (accuracy * 100.0)
# load the CSV file as a numpy matrix
dataset = np.loadtxt(raw_data, delimiter=",")
print(dataset.shape)
X = dataset[:,0:8]
Y = dataset[:,8]
# split data into train and test sets
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,
Y,
test_size=test_size,
random_state=seed)
model = xgboost.XGBClassifier()
model.fit(X_train, y_train)
# make predictions for test data
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]
# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
clf = MLPClassifier(solver='lbfgs', hidden_layer_sizes = params, random_state = 0)
clf.fit(train_val_pairs['train_X'][r], train_val_pairs['train_Y'][r].reshape(train_val_pairs['train_Y'][r].shape[0],))
res = clf.predict(train_val_pairs['val_X'][r])
res = res.reshape((-1, 1))
error = error + (np.sum(res[:, 0] != train_val_pairs['val_Y'][r][:, 0]) / float(res.shape[0]))
error = error / 10.0
print "parameter", params, ", val error =", error
if error < best_error:
best_error = error
best_param = params
print "Best parameter:", best_param
if model == 'xgb':
clf = xgb.XGBClassifier(max_depth=best_param[0], n_estimators=best_param[1])
elif model == 'ann':
clf = MLPClassifier(solver='lbfgs', hidden_layer_sizes = best_param, random_state = 0)
clf.fit(all_data['training_X'], new_training_Y_all.reshape(new_training_Y_all.shape[0],))
best_models[model] = clf
# uncomment this to see XGBoost feature importance for timing classification
"""
if model == 'xgb':
print "XGB feature importance for timing classification:"
score = clf.get_booster().get_fscore()
sorted_score = sorted(score.iteritems(), key = lambda (k, v): (v, k))
for key, value in sorted_score:
fea_id = int(key.replace('f', ''))
print selected_feature_names[fea_id], value
print "\n"
"""
def _xgb_n_targets(xgb):
# type: (...) -> int
if isinstance(xgb, XGBClassifier):
return 1 if xgb.n_classes_ == 2 else xgb.n_classes_
elif isinstance(xgb, XGBRegressor):
return 1
else:
raise TypeError
def model_create(model_name):
if model_name == 'ada55':
classifier = AdaBoostClassifier(DecisionTreeClassifier(max_depth=5),
algorithm="SAMME",
n_estimators=5)
elif model_name == 'xgb':
classifier = xgboost.XGBClassifier(n_estimators=800, seed=0)
elif model_name == 'gb':
classifier = GradientBoostingClassifier(n_estimators=1000)
elif model_name == 'rf':
classifier = RandomForestClassifier()
elif model_name == 'vot':
param_grid = {"base_estimator__criterion" : ["gini"], "base_estimator__splitter" : ["best"], "n_estimators": [3,5, 6]}
DTC = DecisionTreeClassifier(max_depth=5)
ABC = AdaBoostClassifier(base_estimator = DTC, algorithm="SAMME", learning_rate=1, n_estimators=5)
clf1 = GridSearchCV(ABC, param_grid=param_grid, scoring = 'roc_auc')
clf2 = GradientBoostingClassifier(n_estimators=1000)
clf3 = BaggingClassifier()
classifier = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('rtf', clf3)], voting='soft')
elif model_name == 'gs':
clf1 = AdaBoostClassifier(DecisionTreeClassifier(max_depth=5), algorithm="SAMME")
param_grid = {'n_estimators': [4, 5, 6]}
classifier = GridSearchCV(clf1, param_grid=param_grid, scoring='roc_auc')
def execute():
env = Environment(
train_dataset=get_toy_classification_data(),
results_path="HyperparameterHunterAssets",
metrics=["roc_auc_score"],
cv_type=RepeatedStratifiedKFold,
cv_params=dict(n_splits=5, n_repeats=2, random_state=32),
runs=2,
# Just instantiate `Environment` with your list of callbacks, and go about business as usual
experiment_callbacks=[printer_callback(), confusion_matrix_oof()],
# In addition to `printer_callback` made above, we're also adding the `confusion_matrix_oof` callback
# This, and other callbacks, can be found in `hyperparameter_hunter.callbacks.recipes`
)
experiment = CVExperiment(
model_initializer=XGBClassifier,
model_init_params={},
model_extra_params=dict(fit=dict(verbose=False)),
)
def model_builder(model_dir_xgb):
import xgboost
xgboost_model = xgboost.XGBClassifier(
max_depth=max_depth,
learning_rate=learning_rate,
n_estimators=n_estimators,
gamma=gamma,
min_child_weight=min_child_weight,
max_delta_step=max_delta_step,
subsample=subsample,
colsample_bytree=colsample_bytree,
colsample_bylevel=colsample_bylevel,
reg_alpha=reg_alpha,
reg_lambda=reg_lambda,
scale_pos_weight=scale_pos_weight,
base_score=base_score,
seed=seed)
return deepchem.models.xgboost_models.XGBoostModel(
xgboost_model, model_dir_xgb, **esr)