Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_cv(self):
X_train, y_train = load_boston(True)
params = {'verbose': -1}
lgb_train = lgb.Dataset(X_train, y_train)
# shuffle = False, override metric in params
params_with_metric = {'metric': 'l2', 'verbose': -1}
cv_res = lgb.cv(params_with_metric, lgb_train, num_boost_round=10,
nfold=3, stratified=False, shuffle=False,
metrics='l1', verbose_eval=False)
self.assertIn('l1-mean', cv_res)
self.assertNotIn('l2-mean', cv_res)
self.assertEqual(len(cv_res['l1-mean']), 10)
# shuffle = True, callbacks
cv_res = lgb.cv(params, lgb_train, num_boost_round=10, nfold=3, stratified=False, shuffle=True,
metrics='l1', verbose_eval=False,
callbacks=[lgb.reset_parameter(learning_rate=lambda i: 0.1 - 0.001 * i)])
self.assertIn('l1-mean', cv_res)
self.assertEqual(len(cv_res['l1-mean']), 10)
# enable display training loss
cv_res = lgb.cv(params_with_metric, lgb_train, num_boost_round=10,
nfold=3, stratified=False, shuffle=False,
metrics='l1', verbose_eval=False, eval_train_metric=True)
self.assertIn('train l1-mean', cv_res)
self.assertIn('valid l1-mean', cv_res)
self.assertNotIn('train l2-mean', cv_res)
self.assertNotIn('valid l2-mean', cv_res)
self.assertEqual(len(cv_res['train l1-mean']), 10)
self.assertEqual(len(cv_res['valid l1-mean']), 10)
# self defined folds
tss = TimeSeriesSplit(3)
folds = tss.split(X_train)
cv_res_gen = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=folds,
def test_lightgbm_ranking():
try:
import lightgbm
except:
print("Skipping test_lightgbm_ranking!")
return
import shap
# train lightgbm ranker model
x_train, y_train, x_test, y_test, q_train, q_test = shap.datasets.rank()
model = lightgbm.LGBMRanker()
model.fit(x_train, y_train, group=q_train, eval_set=[(x_test, y_test)],
eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=5, verbose=False,
callbacks=[lightgbm.reset_parameter(learning_rate=lambda x: 0.95 ** x * 0.1)])
_validate_shap_values(model, x_test)
def test_lambdarank(self):
X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/lambdarank/rank.train'))
X_test, y_test = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/lambdarank/rank.test'))
q_train = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/lambdarank/rank.train.query'))
q_test = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)),
'../../examples/lambdarank/rank.test.query'))
gbm = lgb.LGBMRanker(n_estimators=50)
gbm.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)],
eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=10, verbose=False,
callbacks=[lgb.reset_parameter(learning_rate=lambda x: max(0.01, 0.1 - 0.01 * x))])
self.assertLessEqual(gbm.best_iteration_, 24)
self.assertGreater(gbm.best_score_['valid_0']['ndcg@1'], 0.6333)
self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.6048)
best_iteration = -1
oof[val_idx] = clf.predict(val[features],num_iteration=best_iteration)
else:
gLR = GBDT_LR(clf)
gLR.fit(X_train, Y_train, eval_set=[(X_test, Y_test)],eval_metric="auc", verbose=1000)
feat_importance = gLR.feature_importance()
best_iteration = -1
clf=gLR
oof[val_idx] = clf.predict(train_df.iloc[val_idx][features], y_=target.iloc[val_idx],
num_iteration=best_iteration)
else: #lambda ranker
gbr = lgb.LGBMRanker()
gbr.fit(X_train, y_train, group=q_train, eval_set=[(X_test, y_test)],
eval_group=[q_test], eval_at=[1, 3], early_stopping_rounds=5, verbose=False,
callbacks=[lgb.reset_parameter(learning_rate=lambda x: 0.95 ** x * 0.1)])
fold_importance_df = pd.DataFrame()
fold_importance_df["feature"] = features
fold_importance_df["importance"] = feat_importance
fold_importance_df["fold"] = fold_ + 1
feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
predictions += clf.predict(test_df[features], num_iteration=best_iteration) / 5
fold_score = roc_auc_score(Y_test, oof[val_idx])
print("fold n°{} time={} score={}".format(fold_,time.time()-t0,fold_score))
#break
cv_score = roc_auc_score(target, oof)
print("CV score: {:<8.5f}".format(cv_score))
if feature_importance_df.size>0:
#if False:
gbm = lgb.train(params,
lgb_train,
num_boost_round=10,
init_model=gbm,
learning_rates=lambda iter: 0.05 * (0.99 ** iter),
valid_sets=lgb_eval)
print('Finished 20 - 30 rounds with decay learning rates...')
# change other parameters during training
gbm = lgb.train(params,
lgb_train,
num_boost_round=10,
init_model=gbm,
valid_sets=lgb_eval,
callbacks=[lgb.reset_parameter(bagging_fraction=[0.7] * 5 + [0.6] * 5)])
print('Finished 30 - 40 rounds with changing bagging_fraction...')
# self-defined objective function
# f(preds: array, train_data: Dataset) -> grad: array, hess: array
# log likelihood loss
def loglikelihood(preds, train_data):
labels = train_data.get_label()
preds = 1. / (1. + np.exp(-preds))
grad = preds - labels
hess = preds * (1. - preds)
return grad, hess
# self-defined eval metric
print(f'{i}:{attr} FOLD:{fold}')
X_train, X_valid = X[train_index], X[valid_index]
y_train, y_valid = y[train_index], y[valid_index]
ys.append(y_valid)
#print(X_train.shape, y_train.shape)
fit_params={"early_stopping_rounds":300,
"eval_metric" : evaluate_macroF1_lgb,
"eval_set" : [(X_valid,y_valid)],
'eval_names': ['valid'],
#'callbacks': [lgb.reset_parameter(learning_rate=learning_rate_010_decay_power_099)],
'verbose': False,
'categorical_feature': 'auto'}
fit_params['callbacks'] = [lgb.reset_parameter(learning_rate=learning_rate_power_0997)]
opt_parameters = {
#'colsample_bytree': 0.9221304051471293,
'min_child_samples': 150,
'num_leaves': 2,
#'subsample': 0.9510118790770111,
'class_weight': 'balanced',
'lambda_l1': 1.79,
'lambda_l2': 1.71,
'num_trees': 2000
}
#clf_final = lgb.LGBMClassifier(**clf.get_params())
#clf_final.set_params(**opt_parameters)
clf_final = lgb.LGBMClassifier(bagging_fraction=0.9957236684465528, boosting_type='gbdt',
class_weight='balanced', colsample_bytree=0.7953949538181928,
feature_fraction=0.7333800304661316, lambda_l1=1.79753950286893,