How to use the fklearn.validation.evaluators.split_evaluator function in fklearn

To help you get started, we’ve selected a few fklearn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nubank / fklearn / tests / validation / test_evaluators.py View on Github external
def test_split_evaluator():
    predictions = pd.DataFrame(
        {
            'split_col_a': [1, 1, 0],
            'split_col_b': [2, 0, 0],
            'target': [0, 1, 2],
            'prediction': [0.5, 0.9, 1.5]
        }
    )

    base_eval = mean_prediction_evaluator
    split_eval = split_evaluator(eval_fn=base_eval, split_col='split_col_a', split_values=[1])

    result = split_evaluator(predictions, split_eval, 'split_col_b', [2])

    assert \
        result['split_evaluator__split_col_b_2']['split_evaluator__split_col_a_1']['mean_evaluator__prediction'] == 0.5
github nubank / fklearn / tests / metrics / test_pd_extractors.py View on Github external
df = pd.DataFrame(boston['data'], columns=boston['feature_names'])
    df['target'] = boston['target']
    df['time'] = pd.date_range(start='2015-01-01', periods=len(df))
    np.random.seed(42)
    df['space'] = np.random.randint(0, 100, size=len(df))

    # Define train function
    train_fn = linear_regression_learner(features=boston['feature_names'].tolist(), target="target")

    # Define evaluator function
    base_evaluator = combined_evaluators(evaluators=[
        r2_evaluator(target_column='target', prediction_column='prediction'),
        spearman_evaluator(target_column='target', prediction_column='prediction')
    ])

    splitter = split_evaluator(eval_fn=base_evaluator, split_col='RAD', split_values=[4.0, 5.0, 24.0])
    temporal_week_splitter = temporal_split_evaluator(eval_fn=base_evaluator, time_col='time', time_format='%Y-%W')
    temporal_year_splitter = temporal_split_evaluator(eval_fn=base_evaluator, time_col='time', time_format='%Y')

    eval_fn = combined_evaluators(evaluators=[base_evaluator, splitter])
    temporal_week_eval_fn = combined_evaluators(evaluators=[base_evaluator, temporal_week_splitter])
    temporal_year_eval_fn = combined_evaluators(evaluators=[base_evaluator, temporal_year_splitter])

    # Define splitters
    cv_split_fn = out_of_time_and_space_splitter(
        n_splits=5, in_time_limit='2016-01-01', time_column='time', space_column='space'
    )

    tlc_split_fn = time_learning_curve_splitter(training_time_limit='2016-01-01', time_column='time', min_samples=0)

    sc_split_fn = stability_curve_time_splitter(training_time_limit='2016-01-01', time_column='time', min_samples=0)
github nubank / fklearn / tests / metrics / test_pd_extractors.py View on Github external
{'mse_evaluator__target': 0.0,
                 'split_evaluator__date': pd.Timestamp('2015-03-09 00:00:00'),
                 'split_evaluator__feature3': 'a'},
                {'mse_evaluator__target': np.nan,
                 'split_evaluator__date': pd.Timestamp('2015-03-09 00:00:00'),
                 'split_evaluator__feature3': 'b'},
                {'mse_evaluator__target': np.nan,
                 'split_evaluator__date': pd.Timestamp('2015-04-04 00:00:00'),
                 'split_evaluator__feature3': 'a'},
                {'mse_evaluator__target': 0.0,
                 'split_evaluator__date': pd.Timestamp('2015-04-04 00:00:00'),
                 'split_evaluator__feature3': 'b'}]
    expected_df = pd.DataFrame.from_dict(expected)
    data = make_tutorial_data(50).dropna(subset=["feature3"]).assign(prediction=lambda d: d.target)

    feature3_evaluator = split_evaluator(eval_fn=mse_evaluator, split_col="feature3")
    feature3_date_evaluator = split_evaluator(eval_fn=feature3_evaluator, split_col="date")

    results = feature3_date_evaluator(data)

    date_values = [
        np.datetime64("2015-01-06T00:00:00.000000000"),
        np.datetime64("2015-01-14T00:00:00.000000000"),
        np.datetime64("2015-01-22T00:00:00.000000000"),
        np.datetime64("2015-01-30T00:00:00.000000000"),
        np.datetime64("2015-03-08T00:00:00.000000000"),
        np.datetime64("2015-03-09T00:00:00.000000000"),
        np.datetime64("2015-04-04T00:00:00.000000000"),
    ]

    base_evaluator = evaluator_extractor(evaluator_name="mse_evaluator__target")
    feature3_extractor = split_evaluator_extractor(
github nubank / fklearn / tests / validation / test_evaluators.py View on Github external
def test_split_evaluator():
    predictions = pd.DataFrame(
        {
            'split_col_a': [1, 1, 0],
            'split_col_b': [2, 0, 0],
            'target': [0, 1, 2],
            'prediction': [0.5, 0.9, 1.5]
        }
    )

    base_eval = mean_prediction_evaluator
    split_eval = split_evaluator(eval_fn=base_eval, split_col='split_col_a', split_values=[1])

    result = split_evaluator(predictions, split_eval, 'split_col_b', [2])

    assert \
        result['split_evaluator__split_col_b_2']['split_evaluator__split_col_a_1']['mean_evaluator__prediction'] == 0.5
github nubank / fklearn / tests / metrics / test_pd_extractors.py View on Github external
'split_evaluator__date': pd.Timestamp('2015-03-09 00:00:00'),
                 'split_evaluator__feature3': 'a'},
                {'mse_evaluator__target': np.nan,
                 'split_evaluator__date': pd.Timestamp('2015-03-09 00:00:00'),
                 'split_evaluator__feature3': 'b'},
                {'mse_evaluator__target': np.nan,
                 'split_evaluator__date': pd.Timestamp('2015-04-04 00:00:00'),
                 'split_evaluator__feature3': 'a'},
                {'mse_evaluator__target': 0.0,
                 'split_evaluator__date': pd.Timestamp('2015-04-04 00:00:00'),
                 'split_evaluator__feature3': 'b'}]
    expected_df = pd.DataFrame.from_dict(expected)
    data = make_tutorial_data(50).dropna(subset=["feature3"]).assign(prediction=lambda d: d.target)

    feature3_evaluator = split_evaluator(eval_fn=mse_evaluator, split_col="feature3")
    feature3_date_evaluator = split_evaluator(eval_fn=feature3_evaluator, split_col="date")

    results = feature3_date_evaluator(data)

    date_values = [
        np.datetime64("2015-01-06T00:00:00.000000000"),
        np.datetime64("2015-01-14T00:00:00.000000000"),
        np.datetime64("2015-01-22T00:00:00.000000000"),
        np.datetime64("2015-01-30T00:00:00.000000000"),
        np.datetime64("2015-03-08T00:00:00.000000000"),
        np.datetime64("2015-03-09T00:00:00.000000000"),
        np.datetime64("2015-04-04T00:00:00.000000000"),
    ]

    base_evaluator = evaluator_extractor(evaluator_name="mse_evaluator__target")
    feature3_extractor = split_evaluator_extractor(
        base_extractor=base_evaluator, split_col="feature3", split_values=["a", "b"]