How to use the xgboost.XGBClassifier function in xgboost

To help you get started, we’ve selected a few xgboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github goruck / edge-tpu-servers / train.py View on Github external
def find_best_xgb_estimator(X, y, cv, param_comb):
    # Random search over specified parameter values for XGBoost.
    # Exhaustive search takes many more cycles w/o much benefit.
    # Returns optimized XGBoost estimator.
    # Ref: https://www.kaggle.com/tilii7/hyperparameter-grid-search-with-xgboost
    print('\n Finding best XGBoost estimator...')
    param_grid = {
        'min_child_weight': [1, 5, 10],
        'gamma': [0.5, 1, 1.5, 2, 5],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.6, 0.8, 1.0],
        'max_depth': [3, 4, 5]
        }
    init_est = xgb(learning_rate=0.02, n_estimators=600, objective='multi:softprob',
        verbose=1, nthread=1)
    random_search = RandomizedSearchCV(estimator=init_est, param_distributions=param_grid,
        n_iter=param_comb, n_jobs=4, iid=False, cv=cv,
        verbose=1, random_state=RANDOM_SEED)
    random_search.fit(X, y)
    #print('\n All results:')
    #print(random_search.cv_results_)
    print('\n Best estimator:')
    print(random_search.best_estimator_)
    print('\n Best normalized gini score for %d-fold search with %d parameter combinations:' %
        (FOLDS, PARA_COMB))
    print(random_search.best_score_)
    print('\n Best hyperparameters:')
    print(random_search.best_params_)
    return random_search.best_estimator_
github ankane / quirk / quirk / classifier.py View on Github external
def _xgboost_predict(self, train_x, train_y, test_x, test_y):
        model = xgb.XGBClassifier(seed=self._seed, n_estimators=100, max_depth=3, learning_rate=0.1)
        self._xgboost_model = model # hack

        eval_metric = self._eval_metric or 'error'

        if test_y is None:
            model.fit(train_x, train_y, eval_metric=eval_metric, verbose=10)
        else:
            eval_set = [(train_x, train_y), (test_x, test_y)]
            model.fit(train_x, train_y, eval_set=eval_set, eval_metric=eval_metric,
                      verbose=10)

        if self._eval_metric == 'mlogloss':
            return model.predict_proba(test_x)
        else:
            return model.predict(test_x)
github vdevmcitylp / local-descriptors-for-image-classification / csltp.py View on Github external
def classification(labels, testLabels):

	with open("../Labels/feature_csltp", 'rb') as f:
		X_train = cPickle.load(f)

	with open("../Labels/feature_test_csltp", "rb") as f:
		X_test = cPickle.load(f)

	y_train = labels
	y_test = testLabels
	# split data into train and test sets
	# seed = 7
	# test_size = 0.33
	# X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)

	model = XGBClassifier(n_estimators=400)
	model.fit(X_train, y_train)

	# make predictions for test data
	y_pred = model.predict(X_test)
	predictions = [round(value) for value in y_pred]

	# evaluate predictions
	accuracy = accuracy_score(y_test, predictions)
	print "Accuracy: %.2f%%" % (accuracy * 100.0)
github braz / pycon2016_talk / code / xgboost_ex2.py View on Github external
# load the CSV file as a numpy matrix
dataset = np.loadtxt(raw_data, delimiter=",")
print(dataset.shape)

X = dataset[:,0:8]
Y = dataset[:,8]

# split data into train and test sets
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X,
																	 Y,
																	 test_size=test_size,
																	 random_state=seed)
model = xgboost.XGBClassifier()
model.fit(X_train, y_train)

# make predictions for test data
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

# evaluate predictions
accuracy = accuracy_score(y_test, predictions)

print("Accuracy: %.2f%%" % (accuracy * 100.0))
github cornell-zhang / quickest / hls / main.py View on Github external
clf = MLPClassifier(solver='lbfgs', hidden_layer_sizes = params, random_state = 0)

          clf.fit(train_val_pairs['train_X'][r], train_val_pairs['train_Y'][r].reshape(train_val_pairs['train_Y'][r].shape[0],))
          res = clf.predict(train_val_pairs['val_X'][r])
          res = res.reshape((-1, 1))
          error = error + (np.sum(res[:, 0] != train_val_pairs['val_Y'][r][:, 0]) / float(res.shape[0]))

        error = error / 10.0
        print "parameter", params, ", val error =", error
        if error < best_error:
          best_error = error
          best_param = params

      print "Best parameter:", best_param
      if model == 'xgb':
        clf = xgb.XGBClassifier(max_depth=best_param[0], n_estimators=best_param[1])
      elif model == 'ann':
        clf = MLPClassifier(solver='lbfgs', hidden_layer_sizes = best_param, random_state = 0)
      clf.fit(all_data['training_X'], new_training_Y_all.reshape(new_training_Y_all.shape[0],))
      best_models[model] = clf

      # uncomment this to see XGBoost feature importance for timing classification
      """
      if model == 'xgb':
        print "XGB feature importance for timing classification:"
        score = clf.get_booster().get_fscore()
        sorted_score = sorted(score.iteritems(), key = lambda (k, v): (v, k))
        for key, value in sorted_score:
          fea_id = int(key.replace('f', ''))
          print selected_feature_names[fea_id], value
        print "\n"
      """
github TeamHG-Memex / eli5 / eli5 / xgboost.py View on Github external
def _xgb_n_targets(xgb):
    # type: (...) -> int
    if isinstance(xgb, XGBClassifier):
        return 1 if xgb.n_classes_ == 2 else xgb.n_classes_
    elif isinstance(xgb, XGBRegressor):
        return 1
    else:
        raise TypeError
github oleg-panichev / Melbourne-University-Seizure-Prediction-2016 / code / train_models.py View on Github external
def model_create(model_name):
    if model_name == 'ada55':
        classifier = AdaBoostClassifier(DecisionTreeClassifier(max_depth=5),
                         algorithm="SAMME",
                         n_estimators=5)
    elif model_name == 'xgb':
        classifier = xgboost.XGBClassifier(n_estimators=800, seed=0)
    elif model_name == 'gb':
        classifier = GradientBoostingClassifier(n_estimators=1000) 
    elif model_name == 'rf':
        classifier = RandomForestClassifier() 
    elif model_name == 'vot':
        param_grid = {"base_estimator__criterion" : ["gini"], "base_estimator__splitter" :   ["best"],  "n_estimators": [3,5, 6]}
        DTC = DecisionTreeClassifier(max_depth=5)
        ABC = AdaBoostClassifier(base_estimator = DTC, algorithm="SAMME", learning_rate=1, n_estimators=5)
        clf1 = GridSearchCV(ABC, param_grid=param_grid, scoring = 'roc_auc')
        clf2 = GradientBoostingClassifier(n_estimators=1000)
        clf3 = BaggingClassifier()
        classifier = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('rtf', clf3)], voting='soft') 
    elif model_name == 'gs':
        clf1 = AdaBoostClassifier(DecisionTreeClassifier(max_depth=5), algorithm="SAMME")
        param_grid = {'n_estimators': [4, 5, 6]}
        classifier = GridSearchCV(clf1, param_grid=param_grid, scoring='roc_auc')
github HunterMcGushion / hyperparameter_hunter / examples / advanced_examples / lambda_callback_example.py View on Github external
def execute():
    env = Environment(
        train_dataset=get_toy_classification_data(),
        results_path="HyperparameterHunterAssets",
        metrics=["roc_auc_score"],
        cv_type=RepeatedStratifiedKFold,
        cv_params=dict(n_splits=5, n_repeats=2, random_state=32),
        runs=2,
        # Just instantiate `Environment` with your list of callbacks, and go about business as usual
        experiment_callbacks=[printer_callback(), confusion_matrix_oof()],
        # In addition to `printer_callback` made above, we're also adding the `confusion_matrix_oof` callback
        # This, and other callbacks, can be found in `hyperparameter_hunter.callbacks.recipes`
    )

    experiment = CVExperiment(
        model_initializer=XGBClassifier,
        model_init_params={},
        model_extra_params=dict(fit=dict(verbose=False)),
    )
github deepchem / deepchem / deepchem / molnet / run_benchmark_models.py View on Github external
def model_builder(model_dir_xgb):
      import xgboost
      xgboost_model = xgboost.XGBClassifier(
          max_depth=max_depth,
          learning_rate=learning_rate,
          n_estimators=n_estimators,
          gamma=gamma,
          min_child_weight=min_child_weight,
          max_delta_step=max_delta_step,
          subsample=subsample,
          colsample_bytree=colsample_bytree,
          colsample_bylevel=colsample_bylevel,
          reg_alpha=reg_alpha,
          reg_lambda=reg_lambda,
          scale_pos_weight=scale_pos_weight,
          base_score=base_score,
          seed=seed)
      return deepchem.models.xgboost_models.XGBoostModel(
          xgboost_model, model_dir_xgb, **esr)