How to use the fklearn.tuning.utils.get_avg_metric_from_extractor function in fklearn

To help you get started, we’ve selected a few fklearn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nubank / fklearn / tests / tuning / test_utils.py View on Github external
def test_get_avg_metric_from_extractor(logs, base_extractor, metric_name):
    result = get_avg_metric_from_extractor(logs[0], base_extractor, metric_name)
    assert result == 0.8
github nubank / fklearn / src / fklearn / tuning / samplers.py View on Github external
parallel: bool (default False)

        nthread: int (default 1)

        seed: int (default 7)
            Random seed

        Returns
        ----------
        features: list of str
            The remaining features after removing based on feature importance

    """
    random.seed(seed)

    curr_metric = get_avg_metric_from_extractor(log, extractor, metric_name)
    eval_size = eval_data.shape[0]

    features_to_shuffle = order_feature_importance_avg_from_logs(log)[-max_removed_by_step:] \
        if speed_up_by_importance else get_used_features(log)

    def shuffle(feature: str) -> pd.DataFrame:
        return eval_data.assign(**{feature: eval_data[feature].sample(frac=1.0)})

    feature_to_delta_metric = compose(lambda m: curr_metric - m,
                                      get_avg_metric_from_extractor(extractor=extractor, metric_name=metric_name),
                                      gen_validator_log(fold_num=0, test_size=eval_size), eval_fn, predict_fn, shuffle)

    if parallel:
        metrics = Parallel(n_jobs=nthread, backend="threading")(
            delayed(feature_to_delta_metric)(feature) for feature in features_to_shuffle)
        feature_to_delta_metric = dict(zip(features_to_shuffle, metrics))
github nubank / fklearn / src / fklearn / tuning / samplers.py View on Github external
The remaining features after removing based on feature importance

    """
    random.seed(seed)

    curr_metric = get_avg_metric_from_extractor(log, extractor, metric_name)
    eval_size = eval_data.shape[0]

    features_to_shuffle = order_feature_importance_avg_from_logs(log)[-max_removed_by_step:] \
        if speed_up_by_importance else get_used_features(log)

    def shuffle(feature: str) -> pd.DataFrame:
        return eval_data.assign(**{feature: eval_data[feature].sample(frac=1.0)})

    feature_to_delta_metric = compose(lambda m: curr_metric - m,
                                      get_avg_metric_from_extractor(extractor=extractor, metric_name=metric_name),
                                      gen_validator_log(fold_num=0, test_size=eval_size), eval_fn, predict_fn, shuffle)

    if parallel:
        metrics = Parallel(n_jobs=nthread, backend="threading")(
            delayed(feature_to_delta_metric)(feature) for feature in features_to_shuffle)
        feature_to_delta_metric = dict(zip(features_to_shuffle, metrics))
        gc.collect()

    else:
        feature_to_delta_metric = {feature: feature_to_delta_metric(feature) for feature in features_to_shuffle}

    return pipe(feature_to_delta_metric,
                valfilter(lambda delta_metric: delta_metric < threshold),
                sorted(key=lambda f: feature_to_delta_metric.get(f)),
                take(max_removed_by_step),
                list)
github nubank / fklearn / src / fklearn / tuning / stoppers.py View on Github external
threshold: float (default 0.001)
        Threshold for model performance comparison

    Returns
    ----------
    stop: bool
        A boolean whether to stop recursion or not
    """

    if len(logs) < early_stop:
        return False

    log_list = [get_best_performing_log(log, extractor, metric_name) for log in logs]

    limited_logs = list(take(early_stop, log_list))
    curr_auc = get_avg_metric_from_extractor(limited_logs[-1], extractor, metric_name)

    return all(
        [(curr_auc - get_avg_metric_from_extractor(log, extractor, metric_name)) <= threshold
         for log in limited_logs[:-1]])
github nubank / fklearn / src / fklearn / tuning / stoppers.py View on Github external
Threshold for model performance comparison

    Returns
    ----------
    stop: bool
        A boolean whether to stop recursion or not
    """

    if len(logs) < early_stop:
        return False

    limited_logs = list(take(early_stop, logs))
    curr_auc = get_avg_metric_from_extractor(limited_logs[-1], extractor, metric_name)

    return all(
        [(curr_auc - get_avg_metric_from_extractor(log, extractor, metric_name)) <= threshold
         for log in limited_logs[:-1]]
    )
github nubank / fklearn / src / fklearn / tuning / stoppers.py View on Github external
Number of iteration without improval before stopping

    threshold: float (default 0.001)
        Threshold for model performance comparison

    Returns
    ----------
    stop: bool
        A boolean whether to stop recursion or not
    """

    if len(logs) < early_stop:
        return False

    limited_logs = list(take(early_stop, logs))
    curr_auc = get_avg_metric_from_extractor(limited_logs[-1], extractor, metric_name)

    return all(
        [(curr_auc - get_avg_metric_from_extractor(log, extractor, metric_name)) <= threshold
         for log in limited_logs[:-1]]
    )
github nubank / fklearn / src / fklearn / tuning / stoppers.py View on Github external
Returns
    ----------
    stop: bool
        A boolean whether to stop recursion or not
    """

    if len(logs) < early_stop:
        return False

    log_list = [get_best_performing_log(log, extractor, metric_name) for log in logs]

    limited_logs = list(take(early_stop, log_list))
    curr_auc = get_avg_metric_from_extractor(limited_logs[-1], extractor, metric_name)

    return all(
        [(curr_auc - get_avg_metric_from_extractor(log, extractor, metric_name)) <= threshold
         for log in limited_logs[:-1]])