Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_xgboost_ranking():
try:
import xgboost
except:
print("Skipping test_xgboost_ranking!")
return
import shap
# train lightgbm ranker model
x_train, y_train, x_test, y_test, q_train, q_test = shap.datasets.rank()
params = {'objective': 'rank:pairwise', 'learning_rate': 0.1,
'gamma': 1.0, 'min_child_weight': 0.1,
'max_depth': 4, 'n_estimators': 4}
model = xgboost.sklearn.XGBRanker(**params)
model.fit(x_train, y_train, q_train.astype(int),
eval_set=[(x_test, y_test)], eval_group=[q_test.astype(int)])
_validate_shap_values(model, x_test)
def test_xgboost_regression(output_margin):
import xgboost as xgb
df = pd.read_csv("./open_data/creditcard.csv")
X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
gbm = xgb.sklearn.XGBRegressor()
gbm.fit(X, y)
gbm.predict(X, output_margin=output_margin)
def test_xgboost_ranking():
try:
import xgboost
except:
print("Skipping test_xgboost_ranking!")
return
import shap
# train lightgbm ranker model
x_train, y_train, x_test, y_test, q_train, q_test = shap.datasets.rank()
params = {'objective': 'rank:pairwise', 'learning_rate': 0.1,
'gamma': 1.0, 'min_child_weight': 0.1,
'max_depth': 4, 'n_estimators': 4}
model = xgboost.sklearn.XGBRanker(**params)
model.fit(x_train, y_train, q_train.astype(int),
eval_set=[(x_test, y_test)], eval_group=[q_test.astype(int)])
_validate_shap_values(model, x_test)
# ('21k_1024.npy', sklearn.linear_model.LogisticRegression(C=100)),
# ('v3_2048.npy', sklearn.linear_model.LogisticRegression(C=100)),
# ('res_full_l2.npy', sklearn.linear_model.LogisticRegression(C=1)),
('21k_50k_2048.npy', sklearn.linear_model.LogisticRegression(C=100)),
('21k_v3_3072.npy', sklearn.linear_model.LogisticRegression(C=100)),
('21k_v3_128.npy', sklearn.linear_model.LogisticRegression(C=50)),
# ('21k.npy', sklearn.linear_model.LogisticRegression(C=50)),
('fisher.npy', sklearn.linear_model.LogisticRegression(C=2)),
('v3_full.npy', sklearn.linear_model.LogisticRegression(C=100)),
('21k_full.npy', sklearn.linear_model.LogisticRegression(C=100)),
# ('vlad_2_21k_full.npy', sklearn.linear_model.LogisticRegression(C=1)),
# ('21k_v3_128.npy', xgb_wrapper()),
# ('fisher_21k_1024.npy', sklearn.linear_model.LogisticRegression(C=2))
# ('v3.npy', sklearn.linear_model.LogisticRegression(C=100)),
('vlad_2_21k_full.npy', xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=100, nthread=8,
max_depth=3, subsample=0.8, colsample_bytree=0.8)),
# ('jo.npy', xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=100, nthread=8,
# max_depth=4, subsample=0.9, colsample_bytree=0.9))
]
def f(weights):
preds = np.array([])
for feature, clf in features:
preds_br = np.load('test/' + feature + '_br.npy')
preds_nn = np.load('test/' + feature + '_nn.npy')
preds_cc = np.load('test/' + feature + '_cc.npy')
# preds_br = (1*preds_br + 3*preds_nn + 2*preds_cc) / 6
preds = np.concatenate((preds, preds_br[..., np.newaxis]), axis=2) \
if preds.size else preds_br[..., np.newaxis]
# ('21k_1024.npy', sklearn.linear_model.LogisticRegression(C=100)),
# ('v3_2048.npy', sklearn.linear_model.LogisticRegression(C=100)),
# ('res_full_l2.npy', sklearn.linear_model.LogisticRegression(C=1)),
('21k_50k_2048.npy', sklearn.linear_model.LogisticRegression(C=100)),
('21k_v3_3072.npy', sklearn.linear_model.LogisticRegression(C=100)),
('21k_v3_128.npy', sklearn.linear_model.LogisticRegression(C=50)),
# ('21k.npy', sklearn.linear_model.LogisticRegression(C=50)),
('fisher.npy', sklearn.linear_model.LogisticRegression(C=2)),
('v3_full.npy', sklearn.linear_model.LogisticRegression(C=100)),
('21k_full.npy', sklearn.linear_model.LogisticRegression(C=100)),
# ('vlad_2_21k_full.npy', sklearn.linear_model.LogisticRegression(C=1)),
# ('21k_v3_128.npy', xgb_wrapper()),
# ('fisher_21k_1024.npy', sklearn.linear_model.LogisticRegression(C=2))
# ('v3.npy', sklearn.linear_model.LogisticRegression(C=100)),
('vlad_2_21k_full.npy', xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=100, nthread=8,
max_depth=3, subsample=0.8, colsample_bytree=0.8)),
# ('jo.npy', xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=100, nthread=8,
# max_depth=4, subsample=0.9, colsample_bytree=0.9))
]
def f(weights):
preds = np.array([])
for feature, clf in features:
preds_br = np.load('test/' + feature + '_br.npy')
preds_nn = np.load('test/' + feature + '_nn.npy')
preds_cc = np.load('test/' + feature + '_cc.npy')
# preds_br = (1*preds_br + 3*preds_nn + 2*preds_cc) / 6
preds = np.concatenate((preds, preds_br[..., np.newaxis]), axis=2) \
if preds.size else preds_br[..., np.newaxis]
# print(i + 1, q, q[0] / q[1])
clf1 = sklearn.linear_model.LogisticRegression(C=200)
clf1vlad = sklearn.linear_model.LogisticRegression(C=1)
clf2 = sklearn.svm.LinearSVR(C=5)
#clf2vlad = sklearn.svm.LinearSVR(C=1)
#clf2 = sklearn.svm.SVR(C=0.1, kernel='linear')
#clf1 = sklearn.linear_model.LogisticRegressionCV(Cs=100)
#clf1 = sklearn.ensemble.RandomForestClassifier(n_estimators=100)
#clf1 = sklearn.neighbors.KNeighborsClassifier(n_neighbors=50)
#clf1 = sklearn.svm.SVC(C=10, gamma=0.03, kernel='linear', probability=True)
clf3 = xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=200, nthread=8,
max_depth=5, subsample=0.9, colsample_bytree=0.9)
clf3vlad = xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=200, nthread=8,
max_depth=5, subsample=0.9, colsample_bytree=0.9)
#kf = cross_validation.KFold(x.shape[0], n_folds=5, shuffle=True, random_state=0)
#res = 0
#for i in range(9):
# res = 0
# for train_index, test_index in kf:
# X_train, X_val = x[train_index], x[test_index]
# y_train, y_val = y[train_index], y[test_index]
# rrr = np.zeros((X_val.shape[0], 9), dtype=np.int32)
#
# clf.fit(X_train, y_train[:, i])
# preds = clf.predict(X_val)
# rrr[:, i] = preds
## print (i, metrics.f1_score(y_val[:, i], preds))
## score = metrics.f1_score(y_val, rrr, average='samples')
# ('21k_1024.npy', sklearn.linear_model.LogisticRegression(C=100)),
# ('v3_2048.npy', sklearn.linear_model.LogisticRegression(C=100)),
# ('res_full_l2.npy', sklearn.linear_model.LogisticRegression(C=1)),
('21k_50k_2048.npy', sklearn.linear_model.LogisticRegression(C=100)),
('21k_v3_3072.npy', sklearn.linear_model.LogisticRegression(C=100)),
('21k_v3_128.npy', sklearn.linear_model.LogisticRegression(C=50)),
# ('21k.npy', sklearn.linear_model.LogisticRegression(C=50)),
('fisher.npy', sklearn.linear_model.LogisticRegression(C=2)),
('v3_full.npy', sklearn.linear_model.LogisticRegression(C=100)),
('21k_full.npy', sklearn.linear_model.LogisticRegression(C=100)),
# ('vlad_2_21k_full.npy', sklearn.linear_model.LogisticRegression(C=1)),
# ('21k_v3_128.npy', xgb_wrapper()),
# ('fisher_21k_1024.npy', sklearn.linear_model.LogisticRegression(C=2))
# ('v3.npy', sklearn.linear_model.LogisticRegression(C=100)),
('vlad_2_21k_full.npy', xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=100, nthread=8,
max_depth=3, subsample=0.8, colsample_bytree=0.8)),
# ('jo.npy', xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=100, nthread=8,
# max_depth=4, subsample=0.9, colsample_bytree=0.9))
]
def train_predict(fold, feature, clf, X_train, y_train, X_test, y_test):
preds_br = np.zeros((X_test.shape[0], 9))
for i in range(0, 9):
clf.fit(X_train, y_train[:, i])
preds_br[:, i] = clf.predict_proba(X_test)[:, 1]
np.save('val3/' + str(fold) + '_' + feature + '_br', preds_br)
nn_preds = np.array([])
n_iter = 10
for i in range(n_iter):
nn_clf.fit(X_train, y_train)
# ('21k_1024.npy', sklearn.linear_model.LogisticRegression(C=100)),
# ('v3_2048.npy', sklearn.linear_model.LogisticRegression(C=100)),
# ('res_full_l2.npy', sklearn.linear_model.LogisticRegression(C=1)),
('21k_50k_2048.npy', sklearn.linear_model.LogisticRegression(C=100)),
('21k_v3_3072.npy', sklearn.linear_model.LogisticRegression(C=100)),
('21k_v3_128.npy', sklearn.linear_model.LogisticRegression(C=50)),
# ('21k.npy', sklearn.linear_model.LogisticRegression(C=50)),
('fisher.npy', sklearn.linear_model.LogisticRegression(C=2)),
('v3_full.npy', sklearn.linear_model.LogisticRegression(C=100)),
('21k_full.npy', sklearn.linear_model.LogisticRegression(C=100)),
# ('vlad_2_21k_full.npy', sklearn.linear_model.LogisticRegression(C=1)),
# ('21k_v3_128.npy', xgb_wrapper()),
# ('fisher_21k_1024.npy', sklearn.linear_model.LogisticRegression(C=2))
# ('v3.npy', sklearn.linear_model.LogisticRegression(C=100)),
('vlad_2_21k_full.npy', xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=100, nthread=8,
max_depth=3, subsample=0.8, colsample_bytree=0.8)),
# ('jo.npy', xgb.sklearn.XGBClassifier(learning_rate=0.1, n_estimators=100, nthread=8,
# max_depth=4, subsample=0.9, colsample_bytree=0.9))
]
def train_predict(clf, X_train, y_train, X_test):
preds_br = np.zeros((X_test.shape[0], 9))
for i in range(0, 9):
clf.fit(X_train, y_train[:, i])
preds_br[:, i] = clf.predict_proba(X_test)[:, 1]
nn_preds = np.array([])
n_iter = 20
for i in range(n_iter):
nn_clf.fit(X_train, y_train)
s_preds = nn_clf.predict_proba(X_test)
def __init__(self, features, transition_system=None):
super(XGBoostReranker, self).__init__(features, transition_system=transition_system)
params = {'objective': 'rank:ndcg', 'learning_rate': .1,
'gamma': 5.0, 'min_child_weight': 0.1,
'max_depth': 4, 'n_estimators': 5}
self.ranker = xgb.sklearn.XGBRanker(**params)
def create_estimator(model_params):
if model_params['engine'] == 'XGBRanker':
params = {'objective': 'rank:pairwise',
'learning_rate': model_params['learning_rate'],
#'gamma': 1.0,
#'min_child_weight': 0.1,
'max_depth': model_params['max_depth'],
'n_estimators': model_params['n_estimators']}
model = xgb.sklearn.XGBRanker(**params)
return model
elif model_params['engine'] == 'LGBMRanker':
params = {'objective': 'lambdarank',
'learning_rate': model_params['learning_rate'],
'max_depth': -1,
'n_estimators': model_params['n_estimators']}
model = lgb.sklearn.LGBMRanker(**params)
return model