How to use the xgboost.sklearn.XGBClassifier function in xgboost

To help you get started, we’ve selected a few xgboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github h2oai / h2o4gpu / tests / python / open_data / gbm / test_xgb_sklearn_wrapper.py View on Github external
def test_xgboost_classifier(output_margin):
    import xgboost as xgb
    df = pd.read_csv("./open_data/creditcard.csv")
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')

    gbm = xgb.sklearn.XGBClassifier()

    gbm.fit(X, y)

    gbm.predict(X, output_margin=output_margin)

    gbm.predict_proba(X, output_margin=output_margin)
github lytforgood / MachineLearningTrick / Xgboost_Feature.py View on Github external
def fit_model(self,X_train,y_train,X_test,y_test):
          clf = XGBClassifier(
                 learning_rate =self.learning_rate,
                 n_estimators=self.n_estimators,
                 max_depth=self.max_depth,
                 min_child_weight=self.min_child_weight,
                 gamma=self.gamma,
                 subsample=self.subsample,
                 colsample_bytree=self.colsample_bytree,
                 objective= self.objective,
                 nthread=self.nthread,
                 scale_pos_weight=self.scale_pos_weight,
                 reg_alpha=self.reg_alpha,
                 reg_lambda=self.reg_lambda,
                 seed=self.seed)
          clf.fit(X_train, y_train)
          y_pre= clf.predict(X_test)
          y_pro= clf.predict_proba(X_test)[:,1]
github blerim153 / kaggle_airbnb_new_user_bookings / prediction.py View on Github external
def perform_prediction(training, labels, testing, xgb_votes, rf_votes):
    """ Perform prediction using a combination of XGB and RandomForests. """
    predictions = np.zeros((len(testing), len(set(labels))))
    # Predictions using xgboost.
    for i in range(xgb_votes):
        print 'XGB vote %d' % i
        xgb = XGBClassifier(
            max_depth=DEPTH_XGB, learning_rate=LEARNING_XGB,
            n_estimators=ESTIMATORS_XGB, objective='multi:softprob',
            subsample=SUBSAMPLE_XGB, colsample_bytree=COLSAMPLE_XGB)
        xgb.fit(training, labels)
        predictions += xgb.predict_proba(testing)
    # Predictions using RandomForestClassifier.
    for i in range(rf_votes):
        print 'RandomForest vote %d' % i
        rand_forest = RandomForestClassifier(
            n_estimators=ESTIMATORS_RF, criterion=CRITERION_RF, n_jobs=JOBS_RF,
            max_depth=DEPTH_RF, min_samples_leaf=MIN_LEAF_RF, bootstrap=True)
        rand_forest.fit(training, labels)
        predictions += rand_forest.predict_proba(testing)
    return predictions
github pjpan / Practice / kaggle-yelp-restaurant-photo-classification-u1234x1234 / bis_avg.py View on Github external
#for i in range(9):
#    q, _ = np.histogram(y[:, i].ravel(), bins=[0, 0.5, 1])
#    print(i + 1, q, q[0] / q[1])

clf1 = sklearn.linear_model.LogisticRegression(C=200)
clf1vlad = sklearn.linear_model.LogisticRegression(C=1)

clf2 = sklearn.svm.LinearSVR(C=5)
#clf2vlad = sklearn.svm.LinearSVR(C=1)

#clf2 = sklearn.svm.SVR(C=0.1, kernel='linear')
#clf1 = sklearn.linear_model.LogisticRegressionCV(Cs=100)
#clf1 = sklearn.ensemble.RandomForestClassifier(n_estimators=100)
#clf1 = sklearn.neighbors.KNeighborsClassifier(n_neighbors=50)
#clf1 = sklearn.svm.SVC(C=10, gamma=0.03, kernel='linear', probability=True)
clf3 = xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=200, nthread=8,
                                max_depth=5, subsample=0.9, colsample_bytree=0.9)
clf3vlad = xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=200, nthread=8,
                                max_depth=5, subsample=0.9, colsample_bytree=0.9)

#kf = cross_validation.KFold(x.shape[0], n_folds=5, shuffle=True, random_state=0)
#res = 0
#for i in range(9):
#    res = 0
#    for train_index, test_index in kf:
#        X_train, X_val = x[train_index], x[test_index]
#        y_train, y_val = y[train_index], y[test_index]
#        rrr = np.zeros((X_val.shape[0], 9), dtype=np.int32)
#
#        clf.fit(X_train, y_train[:, i])
#        preds = clf.predict(X_val)
#        rrr[:, i] = preds
github ixarchakos / nba-games / src / machine_learning / classification.py View on Github external
intercept_scaling=1, random_state=None, max_iter=3000),
                       "knn": KNeighborsClassifier(n_neighbors=100, weights='distance', leaf_size=30, n_jobs=n_jobs),
                       "random_forests": RandomForestClassifier(n_estimators=350, criterion='entropy', min_samples_split=2,
                                                                min_samples_leaf=1, max_leaf_nodes=600, n_jobs=n_jobs),
                       "logistic_regression": LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=2.4, fit_intercept=True, intercept_scaling=1,
                                                                 random_state=None, solver='liblinear', max_iter=1000, multi_class='ovr',
                                                                 warm_start=False, n_jobs=n_jobs),
                       "decision_trees": DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=None, min_samples_split=2,
                                                                min_samples_leaf=100, min_weight_fraction_leaf=0.0, max_features=None,
                                                                random_state=None, max_leaf_nodes=None, presort=False),
                       "sgd": SGDClassifier(alpha=.0001, n_iter=500, penalty="elasticnet", n_jobs=n_jobs),
                       "neural_network": Classifier(layers=[Layer("Sigmoid", units=14), Layer("Sigmoid", units=13), Layer("Sigmoid", units=12),
                                                            Layer("Sigmoid", units=10), Layer("Softmax")], learning_rate=0.01, n_iter=200,
                                                    batch_size=10, regularize='L1', n_stable=50, dropout_rate=0, verbose=True),
                       "GBC": GradientBoostingClassifier(max_depth=10, max_leaf_nodes=850, min_samples_leaf=15, learning_rate=0.1),
                       "XGB": XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
                                            max_depth=10, min_child_weight=2, missing=None, n_estimators=100, nthread=n_jobs, reg_alpha=0,
                                            objective='binary:logistic', reg_lambda=1, scale_pos_weight=1, seed=0, silent=True, subsample=1)}
    return classifier_list[classifier_name].fit(train_set, train_labels)
github pjpan / Practice / kaggle-yelp-restaurant-photo-classification-u1234x1234 / weights.py View on Github external
#           ('21k_1024.npy', sklearn.linear_model.LogisticRegression(C=100)),
#           ('v3_2048.npy', sklearn.linear_model.LogisticRegression(C=100)),
#           ('res_full_l2.npy', sklearn.linear_model.LogisticRegression(C=1)),
            ('21k_50k_2048.npy', sklearn.linear_model.LogisticRegression(C=100)),
           ('21k_v3_3072.npy', sklearn.linear_model.LogisticRegression(C=100)),
            ('21k_v3_128.npy', sklearn.linear_model.LogisticRegression(C=50)),
#            ('21k.npy', sklearn.linear_model.LogisticRegression(C=50)),
           ('fisher.npy', sklearn.linear_model.LogisticRegression(C=2)),
            ('v3_full.npy', sklearn.linear_model.LogisticRegression(C=100)),
            ('21k_full.npy', sklearn.linear_model.LogisticRegression(C=100)),
#            ('vlad_2_21k_full.npy', sklearn.linear_model.LogisticRegression(C=1)),
#           ('21k_v3_128.npy', xgb_wrapper()),
#           ('fisher_21k_1024.npy', sklearn.linear_model.LogisticRegression(C=2))
#           ('v3.npy', sklearn.linear_model.LogisticRegression(C=100)),
            
            ('vlad_2_21k_full.npy', xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=100, nthread=8,
                                max_depth=3, subsample=0.8, colsample_bytree=0.8)),
#            ('jo.npy', xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=100, nthread=8,
#                max_depth=4, subsample=0.9, colsample_bytree=0.9))
            ]


def f_real(weights):
    kf = cross_validation.KFold(2000, n_folds=10, shuffle=True, random_state=0)
    re = np.array([])
    fold = 0
    for train_index, test_index in kf:
    
       y = np.load('y_train.npy')
       y_val = y[test_index]
       preds = np.array([])
       for feature, clf in features:
github Erakhsha / ICU72hReadmissionMIMICIII / generate_datasets / create_dataset.py View on Github external
def grid_search(model_type, X, y, num_of_folds, verbose, first_dim, second_dim=None, third_dim=None, return_auc_values=False):
    
    best_auc=0
    best_auc_setting=None
    
    if model_type=='XGB':        
        auc_matrix=np.zeros((len(first_dim),len(second_dim),len(third_dim)))
        for max_depth_index, max_depth in enumerate(first_dim):
            for n_estimator_index, n_estimator in enumerate(second_dim):
                for learning_rate_index, learning_rate in enumerate(third_dim):
                    
                    model=XGBClassifier(max_depth=int(max_depth), n_estimators=int(n_estimator), learning_rate=learning_rate)
                    auc=auc_calculator(model, X, y, num_of_folds)
                    auc_matrix[max_depth_index, n_estimator_index, learning_rate_index]=auc
                    if auc>best_auc:
                        best_auc=auc
                        best_auc_setting=[max_depth,n_estimator,learning_rate]
                    if verbose==True:
                        sys.stdout.write('\rGRID SEARCHING XGB: progress: {0:.3f} % ...'.format(
                                (max_depth_index*(len(second_dim)*len(third_dim))+
                                 n_estimator_index*(len(third_dim))+
                                 learning_rate_index
                                 +1)/(len(first_dim)*len(second_dim)*len(third_dim))*100))
    if model_type=='LR+LASSO':
        auc_matrix=np.zeros(len(first_dim))
        for index, regularization_strength in enumerate(first_dim):
            model=LogisticRegression(penalty='l1', C=regularization_strength)
            auc=auc_calculator(model, X, y, num_of_folds)
github aarshayj / AJ_ML_Library / models_classification.py View on Github external
def __init__(self,data_train, data_test, target, predictors, cv_folds=10,scoring_metric_skl='accuracy', scoring_metric_xgb='error'):
        
        GenericModelClass.__init__(self, alg=XGBClassifier(), data_train=data_train, 
                                   data_test=data_test, target=target, predictors=predictors,cv_folds=cv_folds,scoring_metric=scoring_metric_skl)
        
        #Define default parameters on your own:
        self.default_parameters = { 
                                 'max_depth':3, 'learning_rate':0.1,
                                 'n_estimators':100, 'silent':True,
                                 'objective':"binary:logistic",
                                 'nthread':1, 'gamma':0, 'min_child_weight':1,
                                 'max_delta_step':0, 'subsample':1, 'colsample_bytree':1, 'colsample_bylevel':1,
                                 'reg_alpha':0, 'reg_lambda':1, 'scale_pos_weight':1,
                                 'base_score':0.5, 'seed':0, 'missing':None
                            }
        self.model_output = pd.Series(self.default_parameters)

        #create DMatrix with no missing:
        self.xgtrain = xgb.DMatrix(self.data_train[self.predictors].values, label=self.data_train[self.target].values)