How to use fklearn - 10 common examples

To help you get started, we’ve selected a few fklearn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nubank / fklearn / tests / metrics / test_pd_extractors.py View on Github external
base_extractor=base_extractors)

    temporal_week_splitter_extractor = temporal_split_evaluator_extractor(
        time_col='time', time_format='%Y-%W', base_extractor=base_extractors)

    temporal_year_splitter_extractor = temporal_split_evaluator_extractor(
        time_col='time', time_format='%Y', base_extractor=base_extractors)

    assert extract(cv_results, base_extractors).shape == (5, 9)
    assert extract(cv_results, splitter_extractor).shape == (15, 10)

    assert extract(tlc_results, base_extractors).shape == (12, 9)
    assert extract(tlc_results, splitter_extractor).shape == (36, 10)

    assert extract(sc_results, base_extractors).shape == (5, 9)
    assert extract(sc_results, splitter_extractor).shape == (15, 10)

    assert extract(fw_sc_results, base_extractors).shape == (3, 9)
    assert extract(fw_sc_results, splitter_extractor).shape == (9, 10)

    n_time_week_folds = len(df['time'].dt.strftime('%Y-%W').unique())
    n_time_year_folds = len(df['time'].dt.strftime('%Y').unique())
    assert temporal_week_splitter_extractor(temporal_week_results).shape == (n_time_week_folds, 3)
    assert temporal_year_splitter_extractor(temporal_year_results).shape == (n_time_year_folds, 3)
github nubank / fklearn / tests / metrics / test_pd_extractors.py View on Github external
temporal_week_splitter_extractor = temporal_split_evaluator_extractor(
        time_col='time', time_format='%Y-%W', base_extractor=base_extractors)

    temporal_year_splitter_extractor = temporal_split_evaluator_extractor(
        time_col='time', time_format='%Y', base_extractor=base_extractors)

    assert extract(cv_results, base_extractors).shape == (5, 9)
    assert extract(cv_results, splitter_extractor).shape == (15, 10)

    assert extract(tlc_results, base_extractors).shape == (12, 9)
    assert extract(tlc_results, splitter_extractor).shape == (36, 10)

    assert extract(sc_results, base_extractors).shape == (5, 9)
    assert extract(sc_results, splitter_extractor).shape == (15, 10)

    assert extract(fw_sc_results, base_extractors).shape == (3, 9)
    assert extract(fw_sc_results, splitter_extractor).shape == (9, 10)

    n_time_week_folds = len(df['time'].dt.strftime('%Y-%W').unique())
    n_time_year_folds = len(df['time'].dt.strftime('%Y').unique())
    assert temporal_week_splitter_extractor(temporal_week_results).shape == (n_time_week_folds, 3)
    assert temporal_year_splitter_extractor(temporal_year_results).shape == (n_time_year_folds, 3)
github nubank / fklearn / tests / metrics / test_pd_extractors.py View on Github external
time_col='time', time_format='%Y-%W', base_extractor=base_extractors)

    temporal_year_splitter_extractor = temporal_split_evaluator_extractor(
        time_col='time', time_format='%Y', base_extractor=base_extractors)

    assert extract(cv_results, base_extractors).shape == (5, 9)
    assert extract(cv_results, splitter_extractor).shape == (15, 10)

    assert extract(tlc_results, base_extractors).shape == (12, 9)
    assert extract(tlc_results, splitter_extractor).shape == (36, 10)

    assert extract(sc_results, base_extractors).shape == (5, 9)
    assert extract(sc_results, splitter_extractor).shape == (15, 10)

    assert extract(fw_sc_results, base_extractors).shape == (3, 9)
    assert extract(fw_sc_results, splitter_extractor).shape == (9, 10)

    n_time_week_folds = len(df['time'].dt.strftime('%Y-%W').unique())
    n_time_year_folds = len(df['time'].dt.strftime('%Y').unique())
    assert temporal_week_splitter_extractor(temporal_week_results).shape == (n_time_week_folds, 3)
    assert temporal_year_splitter_extractor(temporal_year_results).shape == (n_time_year_folds, 3)
github nubank / fklearn / tests / metrics / test_pd_extractors.py View on Github external
# Define extractors
    base_extractors = combined_evaluator_extractor(base_extractors=[
        evaluator_extractor(evaluator_name="r2_evaluator__target"),
        evaluator_extractor(evaluator_name="spearman_evaluator__target")
    ])

    splitter_extractor = split_evaluator_extractor(split_col='RAD', split_values=[4.0, 5.0, 24.0],
                                                   base_extractor=base_extractors)

    temporal_week_splitter_extractor = temporal_split_evaluator_extractor(
        time_col='time', time_format='%Y-%W', base_extractor=base_extractors)

    temporal_year_splitter_extractor = temporal_split_evaluator_extractor(
        time_col='time', time_format='%Y', base_extractor=base_extractors)

    assert extract(cv_results, base_extractors).shape == (5, 9)
    assert extract(cv_results, splitter_extractor).shape == (15, 10)

    assert extract(tlc_results, base_extractors).shape == (12, 9)
    assert extract(tlc_results, splitter_extractor).shape == (36, 10)

    assert extract(sc_results, base_extractors).shape == (5, 9)
    assert extract(sc_results, splitter_extractor).shape == (15, 10)

    assert extract(fw_sc_results, base_extractors).shape == (3, 9)
    assert extract(fw_sc_results, splitter_extractor).shape == (9, 10)

    n_time_week_folds = len(df['time'].dt.strftime('%Y-%W').unique())
    n_time_year_folds = len(df['time'].dt.strftime('%Y').unique())
    assert temporal_week_splitter_extractor(temporal_week_results).shape == (n_time_week_folds, 3)
    assert temporal_year_splitter_extractor(temporal_year_results).shape == (n_time_year_folds, 3)
github nubank / fklearn / tests / metrics / test_pd_extractors.py View on Github external
splitter_extractor = split_evaluator_extractor(split_col='RAD', split_values=[4.0, 5.0, 24.0],
                                                   base_extractor=base_extractors)

    temporal_week_splitter_extractor = temporal_split_evaluator_extractor(
        time_col='time', time_format='%Y-%W', base_extractor=base_extractors)

    temporal_year_splitter_extractor = temporal_split_evaluator_extractor(
        time_col='time', time_format='%Y', base_extractor=base_extractors)

    assert extract(cv_results, base_extractors).shape == (5, 9)
    assert extract(cv_results, splitter_extractor).shape == (15, 10)

    assert extract(tlc_results, base_extractors).shape == (12, 9)
    assert extract(tlc_results, splitter_extractor).shape == (36, 10)

    assert extract(sc_results, base_extractors).shape == (5, 9)
    assert extract(sc_results, splitter_extractor).shape == (15, 10)

    assert extract(fw_sc_results, base_extractors).shape == (3, 9)
    assert extract(fw_sc_results, splitter_extractor).shape == (9, 10)

    n_time_week_folds = len(df['time'].dt.strftime('%Y-%W').unique())
    n_time_year_folds = len(df['time'].dt.strftime('%Y').unique())
    assert temporal_week_splitter_extractor(temporal_week_results).shape == (n_time_week_folds, 3)
    assert temporal_year_splitter_extractor(temporal_year_results).shape == (n_time_year_folds, 3)
github nubank / fklearn / tests / metrics / test_pd_extractors.py View on Github external
evaluator_extractor(evaluator_name="spearman_evaluator__target")
    ])

    splitter_extractor = split_evaluator_extractor(split_col='RAD', split_values=[4.0, 5.0, 24.0],
                                                   base_extractor=base_extractors)

    temporal_week_splitter_extractor = temporal_split_evaluator_extractor(
        time_col='time', time_format='%Y-%W', base_extractor=base_extractors)

    temporal_year_splitter_extractor = temporal_split_evaluator_extractor(
        time_col='time', time_format='%Y', base_extractor=base_extractors)

    assert extract(cv_results, base_extractors).shape == (5, 9)
    assert extract(cv_results, splitter_extractor).shape == (15, 10)

    assert extract(tlc_results, base_extractors).shape == (12, 9)
    assert extract(tlc_results, splitter_extractor).shape == (36, 10)

    assert extract(sc_results, base_extractors).shape == (5, 9)
    assert extract(sc_results, splitter_extractor).shape == (15, 10)

    assert extract(fw_sc_results, base_extractors).shape == (3, 9)
    assert extract(fw_sc_results, splitter_extractor).shape == (9, 10)

    n_time_week_folds = len(df['time'].dt.strftime('%Y-%W').unique())
    n_time_year_folds = len(df['time'].dt.strftime('%Y').unique())
    assert temporal_week_splitter_extractor(temporal_week_results).shape == (n_time_week_folds, 3)
    assert temporal_year_splitter_extractor(temporal_year_results).shape == (n_time_year_folds, 3)
github nubank / fklearn / tests / metrics / test_pd_extractors.py View on Github external
])

    splitter_extractor = split_evaluator_extractor(split_col='RAD', split_values=[4.0, 5.0, 24.0],
                                                   base_extractor=base_extractors)

    temporal_week_splitter_extractor = temporal_split_evaluator_extractor(
        time_col='time', time_format='%Y-%W', base_extractor=base_extractors)

    temporal_year_splitter_extractor = temporal_split_evaluator_extractor(
        time_col='time', time_format='%Y', base_extractor=base_extractors)

    assert extract(cv_results, base_extractors).shape == (5, 9)
    assert extract(cv_results, splitter_extractor).shape == (15, 10)

    assert extract(tlc_results, base_extractors).shape == (12, 9)
    assert extract(tlc_results, splitter_extractor).shape == (36, 10)

    assert extract(sc_results, base_extractors).shape == (5, 9)
    assert extract(sc_results, splitter_extractor).shape == (15, 10)

    assert extract(fw_sc_results, base_extractors).shape == (3, 9)
    assert extract(fw_sc_results, splitter_extractor).shape == (9, 10)

    n_time_week_folds = len(df['time'].dt.strftime('%Y-%W').unique())
    n_time_year_folds = len(df['time'].dt.strftime('%Y').unique())
    assert temporal_week_splitter_extractor(temporal_week_results).shape == (n_time_week_folds, 3)
    assert temporal_year_splitter_extractor(temporal_year_results).shape == (n_time_year_folds, 3)
github nubank / fklearn / tests / metrics / test_pd_extractors.py View on Github external
base_extractors = combined_evaluator_extractor(base_extractors=[
        evaluator_extractor(evaluator_name="r2_evaluator__target"),
        evaluator_extractor(evaluator_name="spearman_evaluator__target")
    ])

    splitter_extractor = split_evaluator_extractor(split_col='RAD', split_values=[4.0, 5.0, 24.0],
                                                   base_extractor=base_extractors)

    temporal_week_splitter_extractor = temporal_split_evaluator_extractor(
        time_col='time', time_format='%Y-%W', base_extractor=base_extractors)

    temporal_year_splitter_extractor = temporal_split_evaluator_extractor(
        time_col='time', time_format='%Y', base_extractor=base_extractors)

    assert extract(cv_results, base_extractors).shape == (5, 9)
    assert extract(cv_results, splitter_extractor).shape == (15, 10)

    assert extract(tlc_results, base_extractors).shape == (12, 9)
    assert extract(tlc_results, splitter_extractor).shape == (36, 10)

    assert extract(sc_results, base_extractors).shape == (5, 9)
    assert extract(sc_results, splitter_extractor).shape == (15, 10)

    assert extract(fw_sc_results, base_extractors).shape == (3, 9)
    assert extract(fw_sc_results, splitter_extractor).shape == (9, 10)

    n_time_week_folds = len(df['time'].dt.strftime('%Y-%W').unique())
    n_time_year_folds = len(df['time'].dt.strftime('%Y').unique())
    assert temporal_week_splitter_extractor(temporal_week_results).shape == (n_time_week_folds, 3)
    assert temporal_year_splitter_extractor(temporal_year_results).shape == (n_time_year_folds, 3)
github nubank / fklearn / tests / training / test_pipeline.py View on Github external
"cat": ["c1", "c1", "c2", None, "c2", "c4"],
        'y': [2.3, 4.0, 100.0, -3.9, 100.0, -3.9]
    })

    df_test = pd.DataFrame({
        'id': ["id4", "id4", "id5", "id6", "id5", "id6"],
        'x1': [12.0, 1000.0, -4.0, 0.0, -4.0, 0.0],
        "x2": [1, 1, 0, None, 0, 1],
        "cat": ["c1", "c2", "c5", None, "c2", "c3"],
        'y': [1.3, -4.0, 0.0, 49, 0.0, 49]
    })

    features = ["x1", "x2", "cat"]
    target = "y"

    train_fn = build_pipeline(
        placeholder_imputer(columns_to_impute=["x1", "x2"], placeholder_value=-999),
        onehot_categorizer(columns_to_categorize=["cat"], hardcode_nans=True),
        xgb_regression_learner(features=features,
                               target=target,
                               num_estimators=20,
                               extra_params={"seed": 42}))

    predict_fn, pred_train, log = train_fn(df_train)

    pred_test = predict_fn(df_test)

    expected_feature_columns_after_encoding = ["x1", "x2", "fklearn_feat__cat==c1", "fklearn_feat__cat==c2",
                                               "fklearn_feat__cat==c4", "fklearn_feat__cat==nan"]

    assert set(pred_test.columns) == set(expected_feature_columns_after_encoding + ["id", target, "prediction"])
github nubank / fklearn / tests / training / test_pipeline.py View on Github external
def test_build_pipeline_predict_arguments_assertion():
    test_df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [2, 4, 6, 8, 10]})

    @fp.curry
    def invalid_learner(df):
        def p(dataset, *a, **b):
            return dataset + len(a) + len(b)

        return p, df, {}

    with pytest.raises(ValueError):
        build_pipeline(invalid_learner)(test_df)