How to use lightgbm - 10 common examples

To help you get started, we’ve selected a few lightgbm examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github C-rawler / DCIC-2019-Credit-intelligence-score-2th-Place / lgb_mse.py View on Github external
"verbosity": -1}

# "seed": 8888
# folds = StratifiedKFold(n_splits=10, shuffle=True, random_state=8888)
# idx = y_train.argsort()
# y_lab = np.repeat(list(range(50000 // 20)), 20)
# y_lab = np.asarray(sorted(list(zip(idx, y_lab))))[:, -1].astype(np.int32)
# splits = folds.split(X_train, y_lab)

folds = KFold(n_splits=5, shuffle=True, random_state=2019)
splits = folds.split(X_train, y_train)

for fold_, (trn_idx, val_idx) in enumerate(splits):
    print("fold n°{}".format(fold_ + 1))
    trn_data = lgb.Dataset(X_train[trn_idx], y_train[trn_idx])
    val_data = lgb.Dataset(X_train[val_idx], y_train[val_idx])

    num_round = 20000
    clf = lgb.train(param, trn_data, num_round, valid_sets=[trn_data, val_data], verbose_eval=100,
                    early_stopping_rounds=100)
    oof_lgb[val_idx] = clf.predict(X_train[val_idx], num_iteration=clf.best_iteration)
    predictions_lgb += clf.predict(X_test, num_iteration=clf.best_iteration) / folds.n_splits


print("MAE CV score: {:<8.8f}".format(1/(mean_absolute_error(oof_lgb, y_train)+1)))
print(predictions_lgb)

np.save('val.mse_lgb.npy',oof_lgb)
np.save('test.mse_lgb.npy',predictions_lgb)
github MetaLearners / NIPS-2018-AutoML-Challenge / src / boosting.py View on Github external
def suggest_learning_rate(self, X, y, max_boost_round):
        
        lr = [0.01, 0.02, 0.03, 0.04, 0.05]
        
        X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=0)

        params = self.setParams(self.default_hyper_param)

        max_round = max_boost_round // 500
        auc = np.zeros([len(lr), max_round])
        for i in range(len(lr)):
            print ('learning rate: %.2f' %(lr[i]))
            params['learning_rate'] = lr[i]
            train_data = lgb.Dataset(X_train, y_train, free_raw_data=False)
            clf = None
            for j in range(max_round):
                clf = lgb.train(params, train_data, num_boost_round=500, init_model=clf, keep_training_booster=True)
                # score with regularization
                auc[i, j] = roc_auc_score(y_valid, clf.predict(X_valid)) - lr[i] * 0.1 + j * 0.001

        print (auc)
        idx = np.argmax(auc)
        best_lr = lr[idx // max_round]
        best_boost_round = (idx % max_round + 1) * 500
        return best_lr, best_boost_round
github dmitryikh / leaves / testdata / lg_dart_breast_cancer.py View on Github external
import lightgbm as lgb
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

X, y = datasets.load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)

n_estimators = 10
d_train = lgb.Dataset(X_train, label=y_train)
params = {
    'boosting_type': 'dart',
    'objective': 'binary',
}
clf = lgb.train(params, d_train, n_estimators)
y_pred = clf.predict(X_test)

clf.save_model('lg_dart_breast_cancer.model')  # save the model in txt format
np.savetxt('lg_dart_breast_cancer_true_predictions.txt', y_pred)
np.savetxt('breast_cancer_test.tsv', X_test, delimiter='\t')
d = clf.dump_model()
import json
with open('lg_dart_breast_cancer.json', 'w') as fout:
    json.dump(d, fout, indent=1)
github microsoft / LightGBM / tests / python_package_test / test_plotting.py View on Github external
def test_plot_split_value_histogram(self):
        gbm0 = lgb.train(self.params, self.train_data, num_boost_round=10)
        ax0 = lgb.plot_split_value_histogram(gbm0, 27)
        self.assertIsInstance(ax0, matplotlib.axes.Axes)
        self.assertEqual(ax0.get_title(), 'Split value histogram for feature with index 27')
        self.assertEqual(ax0.get_xlabel(), 'Feature split value')
        self.assertEqual(ax0.get_ylabel(), 'Count')
        self.assertLessEqual(len(ax0.patches), 2)

        gbm1 = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, silent=True)
        gbm1.fit(self.X_train, self.y_train)

        ax1 = lgb.plot_split_value_histogram(gbm1, gbm1.booster_.feature_name()[27], figsize=(10, 5),
                                             title='Histogram for feature @index/name@ @feature@',
                                             xlabel='x', ylabel='y', color='r')
        self.assertIsInstance(ax1, matplotlib.axes.Axes)
        self.assertEqual(ax1.get_title(),
                         'Histogram for feature name {}'.format(gbm1.booster_.feature_name()[27]))
        self.assertEqual(ax1.get_xlabel(), 'x')
        self.assertEqual(ax1.get_ylabel(), 'y')
        self.assertLessEqual(len(ax1.patches), 2)
        for patch in ax1.patches:
            self.assertTupleEqual(patch.get_facecolor(), (1., 0, 0, 1.))  # red

        ax2 = lgb.plot_split_value_histogram(gbm0, 27, bins=10, color=['r', 'y', 'g', 'b'],
                                             title=None, xlabel=None, ylabel=None)
github ankane / eps / test / support / python / lightgbm_classification.py View on Github external
if binary:
  data["drv"] = data["drv"].replace("r", "4")

numeric_features = ["displ", "year", "cyl"]
categorical_features = ["class"]
text_features = []

mapper = DataFrameMapper(
  [(numeric_features, [ContinuousDomain()])] +
  [([f], [CategoricalDomain(), PMMLLabelEncoder()]) for f in categorical_features] +
  [(f, [CategoricalDomain(), CountVectorizer(tokenizer=Splitter())]) for f in text_features]
)

pipeline = PMMLPipeline([
  ("mapper", mapper),
  ("model", LGBMClassifier(n_estimators=1000))
])
pipeline.fit(data, data["drv"], model__categorical_feature=[3])

suffix = "binary" if binary else "multiclass"
sklearn2pmml(pipeline, "test/support/python/lightgbm_" + suffix + ".pmml")

print(list(pipeline.predict(data[:10])))
print(list(pipeline.predict_proba(data[0:1])[0]))
github interpretml / interpret-community / test / test_misc_explainers.py View on Github external
def _get_create_model(self, classification):
        if classification:
            model = LGBMClassifier()
        else:
            model = LGBMRegressor()

        def create_model(x, y):
            return model.fit(x, y)

        return create_model
github interpretml / interpret-community / test / common_utils.py View on Github external
def create_lightgbm_classifier(X, y):
    lgbm = LGBMClassifier(boosting_type='gbdt', learning_rate=0.1,
                          max_depth=5, n_estimators=200, n_jobs=1, random_state=777)
    model = lgbm.fit(X, y)
    return model
github microsoft / LightGBM / tests / python_package_test / test_sklearn.py View on Github external
# custom metric (disable default metric)
        gbm = lgb.LGBMRegressor(metric='None',
                                **params).fit(eval_metric=constant_metric, **params_fit)
        self.assertEqual(len(gbm.evals_result_['training']), 1)
        self.assertIn('error', gbm.evals_result_['training'])

        # default metric for non-default objective with custom metric
        gbm = lgb.LGBMRegressor(objective='regression_l1',
                                **params).fit(eval_metric=constant_metric, **params_fit)
        self.assertEqual(len(gbm.evals_result_['training']), 2)
        self.assertIn('l1', gbm.evals_result_['training'])
        self.assertIn('error', gbm.evals_result_['training'])

        # non-default metric for non-default objective with custom metric
        gbm = lgb.LGBMRegressor(objective='regression_l1', metric='mape',
                                **params).fit(eval_metric=constant_metric, **params_fit)
        self.assertEqual(len(gbm.evals_result_['training']), 2)
        self.assertIn('mape', gbm.evals_result_['training'])
        self.assertIn('error', gbm.evals_result_['training'])

        # multiple metrics for non-default objective with custom metric
        gbm = lgb.LGBMRegressor(objective='regression_l1', metric=['l1', 'gamma'],
                                **params).fit(eval_metric=constant_metric, **params_fit)
        self.assertEqual(len(gbm.evals_result_['training']), 3)
        self.assertIn('l1', gbm.evals_result_['training'])
        self.assertIn('gamma', gbm.evals_result_['training'])
        self.assertIn('error', gbm.evals_result_['training'])

        # custom metric (disable default metric for non-default objective)
        gbm = lgb.LGBMRegressor(objective='regression_l1', metric='None',
                                **params).fit(eval_metric=constant_metric, **params_fit)
github microsoft / LightGBM / tests / python_package_test / test_engine.py View on Github external
# enable display training loss
        cv_res = lgb.cv(params_with_metric, lgb_train, num_boost_round=10,
                        nfold=3, stratified=False, shuffle=False,
                        metrics='l1', verbose_eval=False, eval_train_metric=True)
        self.assertIn('train l1-mean', cv_res)
        self.assertIn('valid l1-mean', cv_res)
        self.assertNotIn('train l2-mean', cv_res)
        self.assertNotIn('valid l2-mean', cv_res)
        self.assertEqual(len(cv_res['train l1-mean']), 10)
        self.assertEqual(len(cv_res['valid l1-mean']), 10)
        # self defined folds
        tss = TimeSeriesSplit(3)
        folds = tss.split(X_train)
        cv_res_gen = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=folds,
                            verbose_eval=False)
        cv_res_obj = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=tss,
                            verbose_eval=False)
        np.testing.assert_allclose(cv_res_gen['l2-mean'], cv_res_obj['l2-mean'])
        # lambdarank
        X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                                           '../../examples/lambdarank/rank.train'))
        q_train = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                          '../../examples/lambdarank/rank.train.query'))
        params_lambdarank = {'objective': 'lambdarank', 'verbose': -1, 'eval_at': 3}
        lgb_train = lgb.Dataset(X_train, y_train, group=q_train)
        # ... with l2 metric
        cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
                               metrics='l2', verbose_eval=False)
        self.assertEqual(len(cv_res_lambda), 2)
        self.assertFalse(np.isnan(cv_res_lambda['l2-mean']).any())
        # ... with NDCG (default) metric
        cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
github microsoft / LightGBM / tests / python_package_test / test_engine.py View on Github external
tss = TimeSeriesSplit(3)
        folds = tss.split(X_train)
        cv_res_gen = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=folds,
                            verbose_eval=False)
        cv_res_obj = lgb.cv(params_with_metric, lgb_train, num_boost_round=10, folds=tss,
                            verbose_eval=False)
        np.testing.assert_allclose(cv_res_gen['l2-mean'], cv_res_obj['l2-mean'])
        # lambdarank
        X_train, y_train = load_svmlight_file(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                                           '../../examples/lambdarank/rank.train'))
        q_train = np.loadtxt(os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                          '../../examples/lambdarank/rank.train.query'))
        params_lambdarank = {'objective': 'lambdarank', 'verbose': -1, 'eval_at': 3}
        lgb_train = lgb.Dataset(X_train, y_train, group=q_train)
        # ... with l2 metric
        cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
                               metrics='l2', verbose_eval=False)
        self.assertEqual(len(cv_res_lambda), 2)
        self.assertFalse(np.isnan(cv_res_lambda['l2-mean']).any())
        # ... with NDCG (default) metric
        cv_res_lambda = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10, nfold=3,
                               verbose_eval=False)
        self.assertEqual(len(cv_res_lambda), 2)
        self.assertFalse(np.isnan(cv_res_lambda['ndcg@3-mean']).any())
        # self defined folds with lambdarank
        cv_res_lambda_obj = lgb.cv(params_lambdarank, lgb_train, num_boost_round=10,
                                   folds=GroupKFold(n_splits=3),
                                   verbose_eval=False)
        np.testing.assert_allclose(cv_res_lambda['ndcg@3-mean'], cv_res_lambda_obj['ndcg@3-mean'])