How to use the lightgbm.LGBMRegressor function in lightgbm

To help you get started, we’ve selected a few lightgbm examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github microsoft / LightGBM / tests / python_package_test / test_sklearn.py View on Github external
# custom metric (disable default metric)
        gbm = lgb.LGBMRegressor(metric='None',
                                **params).fit(eval_metric=constant_metric, **params_fit)
        self.assertEqual(len(gbm.evals_result_['training']), 1)
        self.assertIn('error', gbm.evals_result_['training'])

        # default metric for non-default objective with custom metric
        gbm = lgb.LGBMRegressor(objective='regression_l1',
                                **params).fit(eval_metric=constant_metric, **params_fit)
        self.assertEqual(len(gbm.evals_result_['training']), 2)
        self.assertIn('l1', gbm.evals_result_['training'])
        self.assertIn('error', gbm.evals_result_['training'])

        # non-default metric for non-default objective with custom metric
        gbm = lgb.LGBMRegressor(objective='regression_l1', metric='mape',
                                **params).fit(eval_metric=constant_metric, **params_fit)
        self.assertEqual(len(gbm.evals_result_['training']), 2)
        self.assertIn('mape', gbm.evals_result_['training'])
        self.assertIn('error', gbm.evals_result_['training'])

        # multiple metrics for non-default objective with custom metric
        gbm = lgb.LGBMRegressor(objective='regression_l1', metric=['l1', 'gamma'],
                                **params).fit(eval_metric=constant_metric, **params_fit)
        self.assertEqual(len(gbm.evals_result_['training']), 3)
        self.assertIn('l1', gbm.evals_result_['training'])
        self.assertIn('gamma', gbm.evals_result_['training'])
        self.assertIn('error', gbm.evals_result_['training'])

        # custom metric (disable default metric for non-default objective)
        gbm = lgb.LGBMRegressor(objective='regression_l1', metric='None',
                                **params).fit(eval_metric=constant_metric, **params_fit)
github onnx / onnxmltools / tests / sklearn / test_TreeEnsembleConverters.py View on Github external
def test_lightgbm__regressor(self):
        model = LGBMRegressor(n_estimators=3, min_child_samples=1)
        self._test_single_output_core(model)
github PiotrekGa / pruned-cv / tests / test_all.py View on Github external
def test_prun_cv_x():

    with pytest.raises(TypeError):
        pruner = PrunedCV(cv=4, tolerance=.1)

        model = LGBMRegressor()
        x = [1, 2, 3]
        y = np.array([1, 2, 3])
        pruner.cross_val_score(model, x, y)
github Jie-Yuan / iFeature / ifeature / features_sub / FeatureSelector.py View on Github external
features = np.array(features)
        labels = np.array(self.labels).reshape((-1,))

        # Empty array for feature importances
        feature_importance_values = np.zeros(len(feature_names))

        print('Training Gradient Boosting Model\n')

        # Iterate through each fold
        for _ in range(n_iterations):

            if task == 'classification':
                model = lgb.LGBMClassifier(n_estimators=1000, learning_rate=0.05, verbose=-1)

            elif task == 'regression':
                model = lgb.LGBMRegressor(n_estimators=1000, learning_rate=0.05, verbose=-1)

            else:
                raise ValueError('Task must be either "classification" or "regression"')

            # If training using early stopping need a validation set
            if early_stopping:

                train_features, valid_features, train_labels, valid_labels = train_test_split(features, labels,
                                                                                              test_size=0.15)

                # Train the model with early stopping
                model.fit(train_features, train_labels, eval_metric=eval_metric,
                          eval_set=[(valid_features, valid_labels)],
                          early_stopping_rounds=100, verbose=-1)

                # Clean up memory
github WolverineSportsAnalytics / basketball / MachineLearning / feature_selector.py View on Github external
labels = np.array(self.labels).reshape((-1, ))

        # Empty array for feature importances
        feature_importance_values = np.zeros(len(feature_names))

        print('Training Gradient Boosting Model\n')

        # Iterate through each fold
        for _ in range(n_iterations):

            if task == 'classification':
                model = lgb.LGBMClassifier(
                    n_estimators=1000, learning_rate=0.05, verbose=-1)

            elif task == 'regression':
                model = lgb.LGBMRegressor(
                    n_estimators=1000, learning_rate=0.05, verbose=-1)

            else:
                raise ValueError(
                    'Task must be either "classification" or "regression"')

            # If training using early stopping need a validation set
            if early_stopping:

                train_features, valid_features, train_labels, valid_labels = train_test_split(
                    features, labels, test_size=0.15)

                # Train the model with early stopping
                model.fit(train_features, train_labels, eval_metric=eval_metric,
                          eval_set=[(valid_features, valid_labels)],
                          early_stopping_rounds=100, verbose=-1)
github duxuhao / Feature-Selection / example / JData2018 / S2elect.py View on Github external
def main():
    sf = ss.Select(Sequence = True, Random = False, Cross = False) #初始化选择器,选择你需要的流程
    sf.ImportDF(df,label = 'nextbuy') #导入数据集以及目标标签
    sf.ImportLossFunction(score, direction = 'ascend') #导入评价函数以及优化方向
    sf.InitialNonTrainableFeatures(['buy','nextbuy','o_date','a_date','PredictDays','user_id']) #初始化不能用的特征
    sf.InitialFeatures(['age_x', 'sex_x', 'user_lv_cd_x', 'buycnt', 'daybeforelastbuy_o_ave']) #初始化其实特征组合
    sf.GenerateCol() #生成特征库 (具体该函数变量请参考根目录下的readme)
    sf.SetSample(1, samplemode = 1) #初始化抽样比例和随机过程
    sf.SetTimeLimit(100) #设置算法运行最长时间,以分钟为单位
    sf.clf = lgbm.LGBMRegressor(random_state=1, num_leaves =6, n_estimators=1000, max_depth=3, learning_rate = 0.2, n_jobs=8) #设定回归模型
    sf.SetLogFile('record.log') #初始化日志文件
    sf.run(validate) #输入检验函数并开始运行
github jrzaurin / pytorch-widedeep / tutorials / lightgbm_airbnb_benchmark.py View on Github external
free_raw_data=False)

maxevals=100
early_stop_dict = {}
objective.i=0
trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=maxevals,
            trials=trials)

best['num_boost_round'] = early_stop_dict[trials.best_trial['tid']]
best['num_leaves'] = int(best['num_leaves'])
best['verbose'] = -1
model = lgb.LGBMRegressor(**best)
model.fit(dtrain.data,
	dtrain.label,
	feature_name=all_columns,
	categorical_feature=categorical_columns)

X_te = df_te.iloc[:,:-1].values
y_te = df_te['yield'].values
preds = model.predict(X_te)
mse = mean_squared_error(y_te, preds)
r2 = r2_score(y_te, preds)

print(np.sqrt(mse), r2)

pickle.dump(model, open("data/models/airbnb_lgb_model.p", 'wb'))
pickle.dump(best, open("data/models/airbnb_lgb_best_params.p", 'wb'))
github datamllab / autokaggle / autokaggle / config.py View on Github external
REGRESSION_HPARAM_SPACE = {
    'extratree': {
        'model': ExtraTreesRegressor,
        'param': EXTRA_TREES_REGRESSOR_PARAMS
    },
    'ridge': {
        'model': Ridge,
        'param': RIDGE_REGRESSOR_PARAMS
    },
    'random_forest': {
        'model': RandomForestRegressor,
        'param': RANDOM_FOREST_REGRESSOR_PARAMS
    },
    'lgbm': {
        'model': LGBMRegressor,
        'param': LGBM_REGRESSOR_PARAMS
    },
    'adaboost': {
        'model': AdaBoostRegressor,
        'param': ADABOOST_REGRESSOR_PARAMS
    },
    'catboost': {
        'model': CatBoostRegressor,
        'param': CATBOOST_REGRESSOR_PARAMS
    }
}

CLASSIFICATION_HPARAM_SPACE = {
    'knn': {
        'model': KNeighborsClassifier,
        'param': KNN_CLASSIFIER_PARAMS
github duxuhao / JData-2018 / Run_11.py View on Github external
#print(df.shape)

t3 = pd.read_csv('trainb_3month_3level_userid_2.csv')
t3p = pd.read_csv('testb_3month_3level_userid_2.csv')
t3 = pd.concat([t3,t3p])

notusable = ['buy','nextbuy','o_date','a_date','PredictDays','user_id','CreateGroup']
month = [i for i in t3.columns if i not in notusable]
tt = t3[['user_id','CreateGroup'] + month]
tt.columns = ['user_id','CreateGroup'] + ['{}_3month'.format(i) for i in month]
df2 = df.merge(tt, on = ['user_id','CreateGroup'], how = 'left')

clf = lgbm.LGBMClassifier(objective='binary', num_leaves=35, max_depth=-1,
                          learning_rate=0.05, seed=1, colsample_bytree=0.8, subsample=0.8, n_estimators=155)

clf2 = lgbm.LGBMRegressor(num_leaves=13, max_depth=4,learning_rate=0.05, seed=1, colsample_bytree=0.8, subsample=0.8, n_estimators=98)
fa = readlog('record_seq_3month.log',0.683594)

fa2 = ['nextbuy_1','nextbuy_2','nextbuy_3','nextbuy_4','nextbuy_5','nextbuy_6','nextbuy_7','nextbuy_8','daybeforelastbuy_o_sum','o_day_series','CommentEndDateDifference']

#Res = validate2(df2, df2, [fa,fa2], [clf,clf2], score,v = True)
predictsecond(df2[df2.CreateGroup < 337].reset_index(), df2[df2.CreateGroup > 337].reset_index(), [fa,fa2],[clf,clf2],'submissionb_618_5200')