Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# custom metric (disable default metric)
gbm = lgb.LGBMRegressor(metric='None',
**params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 1)
self.assertIn('error', gbm.evals_result_['training'])
# default metric for non-default objective with custom metric
gbm = lgb.LGBMRegressor(objective='regression_l1',
**params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 2)
self.assertIn('l1', gbm.evals_result_['training'])
self.assertIn('error', gbm.evals_result_['training'])
# non-default metric for non-default objective with custom metric
gbm = lgb.LGBMRegressor(objective='regression_l1', metric='mape',
**params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 2)
self.assertIn('mape', gbm.evals_result_['training'])
self.assertIn('error', gbm.evals_result_['training'])
# multiple metrics for non-default objective with custom metric
gbm = lgb.LGBMRegressor(objective='regression_l1', metric=['l1', 'gamma'],
**params).fit(eval_metric=constant_metric, **params_fit)
self.assertEqual(len(gbm.evals_result_['training']), 3)
self.assertIn('l1', gbm.evals_result_['training'])
self.assertIn('gamma', gbm.evals_result_['training'])
self.assertIn('error', gbm.evals_result_['training'])
# custom metric (disable default metric for non-default objective)
gbm = lgb.LGBMRegressor(objective='regression_l1', metric='None',
**params).fit(eval_metric=constant_metric, **params_fit)
def test_lightgbm__regressor(self):
model = LGBMRegressor(n_estimators=3, min_child_samples=1)
self._test_single_output_core(model)
def test_prun_cv_x():
with pytest.raises(TypeError):
pruner = PrunedCV(cv=4, tolerance=.1)
model = LGBMRegressor()
x = [1, 2, 3]
y = np.array([1, 2, 3])
pruner.cross_val_score(model, x, y)
features = np.array(features)
labels = np.array(self.labels).reshape((-1,))
# Empty array for feature importances
feature_importance_values = np.zeros(len(feature_names))
print('Training Gradient Boosting Model\n')
# Iterate through each fold
for _ in range(n_iterations):
if task == 'classification':
model = lgb.LGBMClassifier(n_estimators=1000, learning_rate=0.05, verbose=-1)
elif task == 'regression':
model = lgb.LGBMRegressor(n_estimators=1000, learning_rate=0.05, verbose=-1)
else:
raise ValueError('Task must be either "classification" or "regression"')
# If training using early stopping need a validation set
if early_stopping:
train_features, valid_features, train_labels, valid_labels = train_test_split(features, labels,
test_size=0.15)
# Train the model with early stopping
model.fit(train_features, train_labels, eval_metric=eval_metric,
eval_set=[(valid_features, valid_labels)],
early_stopping_rounds=100, verbose=-1)
# Clean up memory
labels = np.array(self.labels).reshape((-1, ))
# Empty array for feature importances
feature_importance_values = np.zeros(len(feature_names))
print('Training Gradient Boosting Model\n')
# Iterate through each fold
for _ in range(n_iterations):
if task == 'classification':
model = lgb.LGBMClassifier(
n_estimators=1000, learning_rate=0.05, verbose=-1)
elif task == 'regression':
model = lgb.LGBMRegressor(
n_estimators=1000, learning_rate=0.05, verbose=-1)
else:
raise ValueError(
'Task must be either "classification" or "regression"')
# If training using early stopping need a validation set
if early_stopping:
train_features, valid_features, train_labels, valid_labels = train_test_split(
features, labels, test_size=0.15)
# Train the model with early stopping
model.fit(train_features, train_labels, eval_metric=eval_metric,
eval_set=[(valid_features, valid_labels)],
early_stopping_rounds=100, verbose=-1)
def main():
sf = ss.Select(Sequence = True, Random = False, Cross = False) #初始化选择器,选择你需要的流程
sf.ImportDF(df,label = 'nextbuy') #导入数据集以及目标标签
sf.ImportLossFunction(score, direction = 'ascend') #导入评价函数以及优化方向
sf.InitialNonTrainableFeatures(['buy','nextbuy','o_date','a_date','PredictDays','user_id']) #初始化不能用的特征
sf.InitialFeatures(['age_x', 'sex_x', 'user_lv_cd_x', 'buycnt', 'daybeforelastbuy_o_ave']) #初始化其实特征组合
sf.GenerateCol() #生成特征库 (具体该函数变量请参考根目录下的readme)
sf.SetSample(1, samplemode = 1) #初始化抽样比例和随机过程
sf.SetTimeLimit(100) #设置算法运行最长时间,以分钟为单位
sf.clf = lgbm.LGBMRegressor(random_state=1, num_leaves =6, n_estimators=1000, max_depth=3, learning_rate = 0.2, n_jobs=8) #设定回归模型
sf.SetLogFile('record.log') #初始化日志文件
sf.run(validate) #输入检验函数并开始运行
free_raw_data=False)
maxevals=100
early_stop_dict = {}
objective.i=0
trials = Trials()
best = fmin(fn=objective,
space=space,
algo=tpe.suggest,
max_evals=maxevals,
trials=trials)
best['num_boost_round'] = early_stop_dict[trials.best_trial['tid']]
best['num_leaves'] = int(best['num_leaves'])
best['verbose'] = -1
model = lgb.LGBMRegressor(**best)
model.fit(dtrain.data,
dtrain.label,
feature_name=all_columns,
categorical_feature=categorical_columns)
X_te = df_te.iloc[:,:-1].values
y_te = df_te['yield'].values
preds = model.predict(X_te)
mse = mean_squared_error(y_te, preds)
r2 = r2_score(y_te, preds)
print(np.sqrt(mse), r2)
pickle.dump(model, open("data/models/airbnb_lgb_model.p", 'wb'))
pickle.dump(best, open("data/models/airbnb_lgb_best_params.p", 'wb'))
REGRESSION_HPARAM_SPACE = {
'extratree': {
'model': ExtraTreesRegressor,
'param': EXTRA_TREES_REGRESSOR_PARAMS
},
'ridge': {
'model': Ridge,
'param': RIDGE_REGRESSOR_PARAMS
},
'random_forest': {
'model': RandomForestRegressor,
'param': RANDOM_FOREST_REGRESSOR_PARAMS
},
'lgbm': {
'model': LGBMRegressor,
'param': LGBM_REGRESSOR_PARAMS
},
'adaboost': {
'model': AdaBoostRegressor,
'param': ADABOOST_REGRESSOR_PARAMS
},
'catboost': {
'model': CatBoostRegressor,
'param': CATBOOST_REGRESSOR_PARAMS
}
}
CLASSIFICATION_HPARAM_SPACE = {
'knn': {
'model': KNeighborsClassifier,
'param': KNN_CLASSIFIER_PARAMS
#print(df.shape)
t3 = pd.read_csv('trainb_3month_3level_userid_2.csv')
t3p = pd.read_csv('testb_3month_3level_userid_2.csv')
t3 = pd.concat([t3,t3p])
notusable = ['buy','nextbuy','o_date','a_date','PredictDays','user_id','CreateGroup']
month = [i for i in t3.columns if i not in notusable]
tt = t3[['user_id','CreateGroup'] + month]
tt.columns = ['user_id','CreateGroup'] + ['{}_3month'.format(i) for i in month]
df2 = df.merge(tt, on = ['user_id','CreateGroup'], how = 'left')
clf = lgbm.LGBMClassifier(objective='binary', num_leaves=35, max_depth=-1,
learning_rate=0.05, seed=1, colsample_bytree=0.8, subsample=0.8, n_estimators=155)
clf2 = lgbm.LGBMRegressor(num_leaves=13, max_depth=4,learning_rate=0.05, seed=1, colsample_bytree=0.8, subsample=0.8, n_estimators=98)
fa = readlog('record_seq_3month.log',0.683594)
fa2 = ['nextbuy_1','nextbuy_2','nextbuy_3','nextbuy_4','nextbuy_5','nextbuy_6','nextbuy_7','nextbuy_8','daybeforelastbuy_o_sum','o_day_series','CommentEndDateDifference']
#Res = validate2(df2, df2, [fa,fa2], [clf,clf2], score,v = True)
predictsecond(df2[df2.CreateGroup < 337].reset_index(), df2[df2.CreateGroup > 337].reset_index(), [fa,fa2],[clf,clf2],'submissionb_618_5200')