How to use the xgboost.sklearn function in xgboost

To help you get started, we’ve selected a few xgboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github slundberg / shap / tests / explainers / test_tree.py View on Github external
def test_xgboost_ranking():
    try:
        import xgboost
    except:
        print("Skipping test_xgboost_ranking!")
        return
    import shap

    # train lightgbm ranker model
    x_train, y_train, x_test, y_test, q_train, q_test = shap.datasets.rank()
    params = {'objective': 'rank:pairwise', 'learning_rate': 0.1,
              'gamma': 1.0, 'min_child_weight': 0.1,
              'max_depth': 4, 'n_estimators': 4}
    model = xgboost.sklearn.XGBRanker(**params)
    model.fit(x_train, y_train, q_train.astype(int),
              eval_set=[(x_test, y_test)], eval_group=[q_test.astype(int)])
    _validate_shap_values(model, x_test)
github h2oai / h2o4gpu / tests / python / open_data / gbm / test_xgb_sklearn_wrapper.py View on Github external
def test_xgboost_regression(output_margin):
    import xgboost as xgb
    df = pd.read_csv("./open_data/creditcard.csv")
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')

    gbm = xgb.sklearn.XGBRegressor()

    gbm.fit(X, y)

    gbm.predict(X, output_margin=output_margin)
github slundberg / shap / tests / explainers / test_tree.py View on Github external
def test_xgboost_ranking():
    try:
        import xgboost
    except:
        print("Skipping test_xgboost_ranking!")
        return
    import shap

    # train lightgbm ranker model
    x_train, y_train, x_test, y_test, q_train, q_test = shap.datasets.rank()
    params = {'objective': 'rank:pairwise', 'learning_rate': 0.1,
              'gamma': 1.0, 'min_child_weight': 0.1,
              'max_depth': 4, 'n_estimators': 4}
    model = xgboost.sklearn.XGBRanker(**params)
    model.fit(x_train, y_train, q_train.astype(int),
              eval_set=[(x_test, y_test)], eval_group=[q_test.astype(int)])
    _validate_shap_values(model, x_test)
github u1234x1234 / kaggle-yelp-restaurant-photo-classification / test_weighting.py View on Github external
#           ('21k_1024.npy', sklearn.linear_model.LogisticRegression(C=100)),
#           ('v3_2048.npy', sklearn.linear_model.LogisticRegression(C=100)),
#           ('res_full_l2.npy', sklearn.linear_model.LogisticRegression(C=1)),
            ('21k_50k_2048.npy', sklearn.linear_model.LogisticRegression(C=100)),
           ('21k_v3_3072.npy', sklearn.linear_model.LogisticRegression(C=100)),
            ('21k_v3_128.npy', sklearn.linear_model.LogisticRegression(C=50)),
#            ('21k.npy', sklearn.linear_model.LogisticRegression(C=50)),
           ('fisher.npy', sklearn.linear_model.LogisticRegression(C=2)),
            ('v3_full.npy', sklearn.linear_model.LogisticRegression(C=100)),
            ('21k_full.npy', sklearn.linear_model.LogisticRegression(C=100)),
#            ('vlad_2_21k_full.npy', sklearn.linear_model.LogisticRegression(C=1)),
#           ('21k_v3_128.npy', xgb_wrapper()),
#           ('fisher_21k_1024.npy', sklearn.linear_model.LogisticRegression(C=2))
#           ('v3.npy', sklearn.linear_model.LogisticRegression(C=100)),
            
            ('vlad_2_21k_full.npy', xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=100, nthread=8,
                                max_depth=3, subsample=0.8, colsample_bytree=0.8)),
#            ('jo.npy', xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=100, nthread=8,
#                max_depth=4, subsample=0.9, colsample_bytree=0.9))
            ]

def f(weights):
   preds = np.array([])
   for feature, clf in features:    
 
       preds_br = np.load('test/' + feature + '_br.npy')
       preds_nn = np.load('test/' + feature + '_nn.npy')
       preds_cc = np.load('test/' + feature + '_cc.npy')       
#       preds_br = (1*preds_br + 3*preds_nn + 2*preds_cc) / 6
       
       preds = np.concatenate((preds, preds_br[..., np.newaxis]), axis=2) \
           if preds.size else preds_br[..., np.newaxis]
github pjpan / Practice / kaggle-yelp-restaurant-photo-classification-u1234x1234 / test_weighting.py View on Github external
#           ('21k_1024.npy', sklearn.linear_model.LogisticRegression(C=100)),
#           ('v3_2048.npy', sklearn.linear_model.LogisticRegression(C=100)),
#           ('res_full_l2.npy', sklearn.linear_model.LogisticRegression(C=1)),
            ('21k_50k_2048.npy', sklearn.linear_model.LogisticRegression(C=100)),
           ('21k_v3_3072.npy', sklearn.linear_model.LogisticRegression(C=100)),
            ('21k_v3_128.npy', sklearn.linear_model.LogisticRegression(C=50)),
#            ('21k.npy', sklearn.linear_model.LogisticRegression(C=50)),
           ('fisher.npy', sklearn.linear_model.LogisticRegression(C=2)),
            ('v3_full.npy', sklearn.linear_model.LogisticRegression(C=100)),
            ('21k_full.npy', sklearn.linear_model.LogisticRegression(C=100)),
#            ('vlad_2_21k_full.npy', sklearn.linear_model.LogisticRegression(C=1)),
#           ('21k_v3_128.npy', xgb_wrapper()),
#           ('fisher_21k_1024.npy', sklearn.linear_model.LogisticRegression(C=2))
#           ('v3.npy', sklearn.linear_model.LogisticRegression(C=100)),
            
            ('vlad_2_21k_full.npy', xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=100, nthread=8,
                                max_depth=3, subsample=0.8, colsample_bytree=0.8)),
#            ('jo.npy', xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=100, nthread=8,
#                max_depth=4, subsample=0.9, colsample_bytree=0.9))
            ]

def f(weights):
   preds = np.array([])
   for feature, clf in features:    
 
       preds_br = np.load('test/' + feature + '_br.npy')
       preds_nn = np.load('test/' + feature + '_nn.npy')
       preds_cc = np.load('test/' + feature + '_cc.npy')       
#       preds_br = (1*preds_br + 3*preds_nn + 2*preds_cc) / 6
       
       preds = np.concatenate((preds, preds_br[..., np.newaxis]), axis=2) \
           if preds.size else preds_br[..., np.newaxis]
github pjpan / Practice / kaggle-yelp-restaurant-photo-classification-u1234x1234 / bis_avg.py View on Github external
#    print(i + 1, q, q[0] / q[1])

clf1 = sklearn.linear_model.LogisticRegression(C=200)
clf1vlad = sklearn.linear_model.LogisticRegression(C=1)

clf2 = sklearn.svm.LinearSVR(C=5)
#clf2vlad = sklearn.svm.LinearSVR(C=1)

#clf2 = sklearn.svm.SVR(C=0.1, kernel='linear')
#clf1 = sklearn.linear_model.LogisticRegressionCV(Cs=100)
#clf1 = sklearn.ensemble.RandomForestClassifier(n_estimators=100)
#clf1 = sklearn.neighbors.KNeighborsClassifier(n_neighbors=50)
#clf1 = sklearn.svm.SVC(C=10, gamma=0.03, kernel='linear', probability=True)
clf3 = xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=200, nthread=8,
                                max_depth=5, subsample=0.9, colsample_bytree=0.9)
clf3vlad = xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=200, nthread=8,
                                max_depth=5, subsample=0.9, colsample_bytree=0.9)

#kf = cross_validation.KFold(x.shape[0], n_folds=5, shuffle=True, random_state=0)
#res = 0
#for i in range(9):
#    res = 0
#    for train_index, test_index in kf:
#        X_train, X_val = x[train_index], x[test_index]
#        y_train, y_val = y[train_index], y[test_index]
#        rrr = np.zeros((X_val.shape[0], 9), dtype=np.int32)
#
#        clf.fit(X_train, y_train[:, i])
#        preds = clf.predict(X_val)
#        rrr[:, i] = preds
##        print (i, metrics.f1_score(y_val[:, i], preds))
##    score = metrics.f1_score(y_val, rrr, average='samples')
github pjpan / Practice / kaggle-yelp-restaurant-photo-classification-u1234x1234 / save_folds.py View on Github external
#           ('21k_1024.npy', sklearn.linear_model.LogisticRegression(C=100)),
#           ('v3_2048.npy', sklearn.linear_model.LogisticRegression(C=100)),
#           ('res_full_l2.npy', sklearn.linear_model.LogisticRegression(C=1)),
            ('21k_50k_2048.npy', sklearn.linear_model.LogisticRegression(C=100)),
           ('21k_v3_3072.npy', sklearn.linear_model.LogisticRegression(C=100)),
            ('21k_v3_128.npy', sklearn.linear_model.LogisticRegression(C=50)),
#            ('21k.npy', sklearn.linear_model.LogisticRegression(C=50)),
           ('fisher.npy', sklearn.linear_model.LogisticRegression(C=2)),
            ('v3_full.npy', sklearn.linear_model.LogisticRegression(C=100)),
            ('21k_full.npy', sklearn.linear_model.LogisticRegression(C=100)),
#            ('vlad_2_21k_full.npy', sklearn.linear_model.LogisticRegression(C=1)),
#           ('21k_v3_128.npy', xgb_wrapper()),
#           ('fisher_21k_1024.npy', sklearn.linear_model.LogisticRegression(C=2))
#           ('v3.npy', sklearn.linear_model.LogisticRegression(C=100)),
            
            ('vlad_2_21k_full.npy', xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=100, nthread=8,
                                max_depth=3, subsample=0.8, colsample_bytree=0.8)),
#            ('jo.npy', xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=100, nthread=8,
#                max_depth=4, subsample=0.9, colsample_bytree=0.9))
            ]

def train_predict(fold, feature, clf, X_train, y_train, X_test, y_test):
    preds_br = np.zeros((X_test.shape[0], 9))
    for i in range(0, 9):
        clf.fit(X_train, y_train[:, i])
        preds_br[:, i] = clf.predict_proba(X_test)[:, 1]
    np.save('val3/' + str(fold) + '_' + feature + '_br', preds_br)

    nn_preds = np.array([])
    n_iter = 10
    for i in range(n_iter):
        nn_clf.fit(X_train, y_train)
github pjpan / Practice / kaggle-yelp-restaurant-photo-classification-u1234x1234 / ensemble.py View on Github external
#           ('21k_1024.npy', sklearn.linear_model.LogisticRegression(C=100)),
#           ('v3_2048.npy', sklearn.linear_model.LogisticRegression(C=100)),
#           ('res_full_l2.npy', sklearn.linear_model.LogisticRegression(C=1)),
            ('21k_50k_2048.npy', sklearn.linear_model.LogisticRegression(C=100)),
           ('21k_v3_3072.npy', sklearn.linear_model.LogisticRegression(C=100)),
            ('21k_v3_128.npy', sklearn.linear_model.LogisticRegression(C=50)),
#            ('21k.npy', sklearn.linear_model.LogisticRegression(C=50)),
           ('fisher.npy', sklearn.linear_model.LogisticRegression(C=2)),
            ('v3_full.npy', sklearn.linear_model.LogisticRegression(C=100)),
            ('21k_full.npy', sklearn.linear_model.LogisticRegression(C=100)),
#            ('vlad_2_21k_full.npy', sklearn.linear_model.LogisticRegression(C=1)),
#           ('21k_v3_128.npy', xgb_wrapper()),
#           ('fisher_21k_1024.npy', sklearn.linear_model.LogisticRegression(C=2))
#           ('v3.npy', sklearn.linear_model.LogisticRegression(C=100)),
            
            ('vlad_2_21k_full.npy', xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=100, nthread=8,
                                max_depth=3, subsample=0.8, colsample_bytree=0.8)),
#            ('jo.npy', xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=100, nthread=8,
#                max_depth=4, subsample=0.9, colsample_bytree=0.9))
            ]

def train_predict(clf, X_train, y_train, X_test):
    preds_br = np.zeros((X_test.shape[0], 9))
    for i in range(0, 9):
        clf.fit(X_train, y_train[:, i])
        preds_br[:, i] = clf.predict_proba(X_test)[:, 1]   

    nn_preds = np.array([])
    n_iter = 20
    for i in range(n_iter):
        nn_clf.fit(X_train, y_train)
        s_preds = nn_clf.predict_proba(X_test)
github pcyin / tranX / components / reranker.py View on Github external
def __init__(self, features, transition_system=None):
        super(XGBoostReranker, self).__init__(features, transition_system=transition_system)

        params = {'objective': 'rank:ndcg', 'learning_rate': .1,
                  'gamma': 5.0, 'min_child_weight': 0.1,
                  'max_depth': 4, 'n_estimators': 5}

        self.ranker = xgb.sklearn.XGBRanker(**params)
github Koziev / chatbot / ruchatbot / experiments / train_synonymy_detector_xgb_pairwise_ranking.py View on Github external
def create_estimator(model_params):
    if model_params['engine'] == 'XGBRanker':
        params = {'objective': 'rank:pairwise',
                  'learning_rate': model_params['learning_rate'],
                  #'gamma': 1.0,
                  #'min_child_weight': 0.1,
                  'max_depth': model_params['max_depth'],
                  'n_estimators': model_params['n_estimators']}
        model = xgb.sklearn.XGBRanker(**params)
        return model
    elif model_params['engine'] == 'LGBMRanker':
        params = {'objective': 'lambdarank',
                  'learning_rate': model_params['learning_rate'],
                  'max_depth': -1,
                  'n_estimators': model_params['n_estimators']}
        model = lgb.sklearn.LGBMRanker(**params)
        return model