How to use the fklearn.training.classification.xgb_classification_learner function in fklearn

To help you get started, we’ve selected a few fklearn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nubank / fklearn / tests / training / test_classification.py View on Github external
"x2": [0, 1, 1, 0, 1, 0],
        "w": [2, 1, 2, 0.5, 2, 0.5],
        'y': [0, 1, 2, 1, 2, 0]
    })

    df_test_multinomial = pd.DataFrame({
        'id': ["id4", "id4", "id5", "id6", "id5", "id6"],
        'x1': [12.0, 1000.0, -4.0, 0.0, -4.0, 0.0],
        "x2": [1, 1, 0, 1, 0, 1],
        "w": [1, 2, 0, 0.5, 0, 0.5],
        'y': [1, 2, 0, 1, 2, 0]
    })

    features = ["x1", "x2"]

    learner_binary = xgb_classification_learner(features=features,
                                                target="y",
                                                learning_rate=0.1,
                                                num_estimators=20,
                                                extra_params={"max_depth": 4, "seed": 42},
                                                prediction_column="prediction",
                                                weight_column="w")

    predict_fn_binary, pred_train_binary, log = learner_binary(df_train_binary)

    pred_test_binary = predict_fn_binary(df_test_binary)

    expected_col_train = df_train_binary.columns.tolist() + ["prediction"]
    expected_col_test = df_test_binary.columns.tolist() + ["prediction"]

    assert Counter(expected_col_train) == Counter(pred_train_binary.columns.tolist())
    assert Counter(expected_col_test) == Counter(pred_test_binary.columns.tolist())
github nubank / fklearn / tests / tuning / test_parameter_tuners.py View on Github external
def param_train_fn(space, train_set):
        return xgb_classification_learner(features=["x"],
                                          target="target",
                                          learning_rate=space["learning_rate"],
                                          num_estimators=space["num_estimators"])(train_set)
github nubank / fklearn / tests / tuning / test_parameter_tuners.py View on Github external
def param_train_fn(space, train_set):
        return xgb_classification_learner(features=["x"],
                                          target="target",
                                          learning_rate=space["learning_rate"],
                                          num_estimators=space["num_estimators"])(train_set)
github nubank / fklearn / tests / training / test_classification.py View on Github external
expected_col_test = df_test_binary.columns.tolist() + ["prediction"]

    assert Counter(expected_col_train) == Counter(pred_train_binary.columns.tolist())
    assert Counter(expected_col_test) == Counter(pred_test_binary.columns.tolist())
    assert pred_test_binary.prediction.max() < 1
    assert pred_test_binary.prediction.min() > 0
    assert (pred_test_binary.columns == pred_train_binary.columns).all()

    # SHAP test
    pred_shap = predict_fn_binary(df_test_binary, apply_shap=True)
    assert "shap_values" in pred_shap.columns
    assert "shap_expected_value" in pred_shap.columns
    assert np.vstack(pred_shap["shap_values"]).shape == (4, 2)

    # test multinomial case
    learner_multinomial = xgb_classification_learner(features=features,
                                                     target="y",
                                                     learning_rate=0.1,
                                                     num_estimators=20,
                                                     extra_params={"max_depth": 2,
                                                                   "seed": 42,
                                                                   "objective": 'multi:softprob',
                                                                   "num_class": 3},
                                                     prediction_column="prediction")

    predict_fn_multinomial, pred_train_multinomial, log = learner_multinomial(df_train_multinomial)

    pred_test_multinomial = predict_fn_multinomial(df_test_multinomial)

    expected_col_train = df_train_binary.columns.tolist() + ["prediction_0", "prediction_1", "prediction_2",
                                                             "prediction"]
    expected_col_test = df_test_binary.columns.tolist() + ["prediction_0", "prediction_1", "prediction_2",
github nubank / fklearn / src / fklearn / training / ensemble.py View on Github external
train_split_bins: list
        A list with the actual values of the categories from the `train_split_col`. Ex: if you want to split your
        training by tenure and you have a tenure column with integer values [1,2,3,...,12] you can pass this list and
        you will split your training into 12 different models.

    nthread: int
        Number of threads for the XGBoost learners.

    target_column: str
        The name of the target column.

    prediction_column: str
        The name of the column with the predictions from the model.
    """

    train_fns = {b: xgb_classification_learner(features=features_by_bin[b],
                                               learning_rate=learning_rate_by_bin[b],
                                               num_estimators=num_estimators_by_bin[b],
                                               target=target_column,
                                               extra_params=assoc(extra_params_by_bin[b], 'nthread', nthread),
                                               prediction_column=prediction_column + "_bin_" + str(b))
                 for b in train_split_bins}

    train_sets = {b: train_set[train_set[train_split_col] == b]
                  for b in train_split_bins}

    train_results = {b: train_fns[b](train_sets[b])
                     for b in train_split_bins}

    # train_results is a 3-tuple (prediction functions, predicted train dataset, train logs)
    pred_fns = {b: train_results[b][0] for b in train_split_bins}
    train_logs = {b: train_results[b][2] for b in train_split_bins}