How to use the lightgbm.LGBMClassifier function in lightgbm

To help you get started, we’ve selected a few lightgbm examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github microsoft / LightGBM / tests / python_package_test / test_plotting.py View on Github external
def test_plot_split_value_histogram(self):
        gbm0 = lgb.train(self.params, self.train_data, num_boost_round=10)
        ax0 = lgb.plot_split_value_histogram(gbm0, 27)
        self.assertIsInstance(ax0, matplotlib.axes.Axes)
        self.assertEqual(ax0.get_title(), 'Split value histogram for feature with index 27')
        self.assertEqual(ax0.get_xlabel(), 'Feature split value')
        self.assertEqual(ax0.get_ylabel(), 'Count')
        self.assertLessEqual(len(ax0.patches), 2)

        gbm1 = lgb.LGBMClassifier(n_estimators=10, num_leaves=3, silent=True)
        gbm1.fit(self.X_train, self.y_train)

        ax1 = lgb.plot_split_value_histogram(gbm1, gbm1.booster_.feature_name()[27], figsize=(10, 5),
                                             title='Histogram for feature @index/name@ @feature@',
                                             xlabel='x', ylabel='y', color='r')
        self.assertIsInstance(ax1, matplotlib.axes.Axes)
        self.assertEqual(ax1.get_title(),
                         'Histogram for feature name {}'.format(gbm1.booster_.feature_name()[27]))
        self.assertEqual(ax1.get_xlabel(), 'x')
        self.assertEqual(ax1.get_ylabel(), 'y')
        self.assertLessEqual(len(ax1.patches), 2)
        for patch in ax1.patches:
            self.assertTupleEqual(patch.get_facecolor(), (1., 0, 0, 1.))  # red

        ax2 = lgb.plot_split_value_histogram(gbm0, 27, bins=10, color=['r', 'y', 'g', 'b'],
                                             title=None, xlabel=None, ylabel=None)
github ankane / eps / test / support / python / lightgbm_classification.py View on Github external
if binary:
  data["drv"] = data["drv"].replace("r", "4")

numeric_features = ["displ", "year", "cyl"]
categorical_features = ["class"]
text_features = []

mapper = DataFrameMapper(
  [(numeric_features, [ContinuousDomain()])] +
  [([f], [CategoricalDomain(), PMMLLabelEncoder()]) for f in categorical_features] +
  [(f, [CategoricalDomain(), CountVectorizer(tokenizer=Splitter())]) for f in text_features]
)

pipeline = PMMLPipeline([
  ("mapper", mapper),
  ("model", LGBMClassifier(n_estimators=1000))
])
pipeline.fit(data, data["drv"], model__categorical_feature=[3])

suffix = "binary" if binary else "multiclass"
sklearn2pmml(pipeline, "test/support/python/lightgbm_" + suffix + ".pmml")

print(list(pipeline.predict(data[:10])))
print(list(pipeline.predict_proba(data[0:1])[0]))
github interpretml / interpret-community / test / test_misc_explainers.py View on Github external
def _get_create_model(self, classification):
        if classification:
            model = LGBMClassifier()
        else:
            model = LGBMRegressor()

        def create_model(x, y):
            return model.fit(x, y)

        return create_model
github interpretml / interpret-community / test / common_utils.py View on Github external
def create_lightgbm_classifier(X, y):
    lgbm = LGBMClassifier(boosting_type='gbdt', learning_rate=0.1,
                          max_depth=5, n_estimators=200, n_jobs=1, random_state=777)
    model = lgbm.fit(X, y)
    return model
github onnx / onnxmltools / tests / sklearn / test_TreeEnsembleConverters.py View on Github external
def test_lightgbm__classifier(self):
        model = LGBMClassifier(n_estimators=3, min_child_samples=1)
        self._test_binary_classification_core(model)
        self._test_single_output_core(model)
github wangle1218 / Advertising-algorithm-competition / 2018tengxun / _2_lgb_model.py View on Github external
def lgb_predict(training,label,predict):
    # feature_list = training.columns.tolist()
    # training = training.values
    # predict = predict.values
    print(".....")

    clf = lgb.LGBMClassifier(
        boosting_type='gbdt', num_leaves=41, reg_alpha=0.0, reg_lambda=1.8,
        max_depth=-1, n_estimators=2000, objective='binary',
        subsample=0.6, colsample_bytree=0.7, subsample_freq=1,
        learning_rate=0.08, min_child_samples=50, random_state=42, n_jobs=-1
    )
    # X_train, X_val, y_train, y_val = train_test_split(training, label, test_size=0.1, random_state=42)
    shuffle_indices = np.random.permutation(np.arange(len(label)))
    training = training[shuffle_indices]
    label = label[shuffle_indices]
    train_num = int(0.95*len(label))
    X_train, X_val = training[:train_num],training[train_num:]
    y_train, y_val = label[:train_num],label[train_num:]
    print("spilt done")
    del training
    del label
    gc.collect()
github lemon234071 / 2018- / code / baseline.py View on Github external
def LGB_test(train_x,train_y,test_x,test_y):
    from multiprocessing import cpu_count
    print("LGB test")
    clf = lgb.LGBMClassifier(
        boosting_type='gbdt', num_leaves=31, reg_alpha=0.0, reg_lambda=1,
        max_depth=-1, n_estimators=1000, objective='binary',
        subsample=0.7, colsample_bytree=0.7, subsample_freq=1,
        learning_rate=0.05, min_child_weight=50,random_state=2018,n_jobs=cpu_count()-1
    )
    clf.fit(train_x, train_y,eval_set=[(train_x, train_y),(test_x,test_y)],eval_metric='auc',early_stopping_rounds=100)
    print(clf.feature_importances_)
    return clf,clf.best_score_[ 'valid_1']['auc']
github XAI-ANITI / ethik / docs / create_gallery.py View on Github external
"race": "category",
        "gender": "category",
        "native-country": "category",
    }

    X = pd.read_csv(url, names=names, header=None, dtype=dtypes)
    X["gender"] = (
        X["gender"].str.strip().astype("category")
    )  # Remove leading whitespace
    y = X.pop("salary").map({" <=50K": False, " >50K": True})

    X_train, X_test, y_train, y_test = model_selection.train_test_split(
        X, y, shuffle=True, random_state=42
    )

    model = lgb.LGBMClassifier(random_state=42).fit(X_train, y_train)
    y_pred = model.predict_proba(X_test)[:, 1]
    y_pred = pd.Series(y_pred, name=">$50k")

    explainer = ethik.ClassificationExplainer()

    return explainer, X_test, y_pred, y_test
github YouChouNoBB / 2018-tencent-ad-competition-baseline / bryan_baseline_v2.py View on Github external
def LGB_predict(train_x,train_y,test_x,res,index):
    print("LGB test")
    clf = lgb.LGBMClassifier(
        boosting_type='gbdt', num_leaves=31, reg_alpha=0.0, reg_lambda=1,
        max_depth=-1, n_estimators=1500, objective='binary',
        subsample=0.7, colsample_bytree=0.7, subsample_freq=1,
        learning_rate=0.05, min_child_weight=50, random_state=2018, n_jobs=-1
    )
    clf.fit(train_x, train_y, eval_set=[(train_x, train_y)], eval_metric='auc',early_stopping_rounds=100)
    res['score'+str(index)] = clf.predict_proba(test_x)[:,1]
    res['score'+str(index)] = res['score'+str(index)].apply(lambda x: float('%.6f' % x))
    print(str(index)+' predict finish!')
    res = res.reset_index(drop=True)
    print(res.head())
    gc.collect()
    return res['score'+str(index)]
github chenghuige / wenzheng / projects / ai2018 / sentiment / ensemble / lgb-cv.py View on Github external
'boosting_type': 'gbdt',
        'objective': 'multiclass',
        'num_class': num_classes,
        'feature_fraction': 0.8,
        'bagging_fraction': 0.8,
        'verbose': -1,
        'metric': ['multi_logloss'],
        "learning_rate": 0.2,
        "max_depth": 5,
        "num_leaves": 10,
        "reg_lambda": 0.1,
        "num_trees": 500,
        "min_data_in_leaf": 100,
          }

clf = lgb.LGBMClassifier(max_depth=-1, learning_rate=0.05, objective='multiclass',
                         random_state=314, silent=True, metric='None', 
                         n_jobs=4, n_estimators=5000, class_weight='balanced')

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

from sklearn.metrics import f1_score
def evaluate_macroF1_lgb(truth, predictions):  
    # this follows the discussion in https://github.com/Microsoft/LightGBM/issues/1483
    pred_labels = predictions.reshape(len(np.unique(truth)),-1).argmax(axis=0)
    f1 = f1_score(truth, pred_labels, average='macro')
    return ('macroF1', f1, True) 

import lightgbm as lgb

def learning_rate_power_0997(current_iter):