How to use the fklearn.training.utils.log_learner_time function in fklearn

To help you get started, we’ve selected a few fklearn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nubank / fklearn / src / fklearn / training / ensemble.py View on Github external
@log_learner_time(learner_name='xgb_octopus_classification_learner')
def xgb_octopus_classification_learner(train_set: pd.DataFrame,
                                       learning_rate_by_bin: Dict[T, float],
                                       num_estimators_by_bin: Dict[T, int],
                                       extra_params_by_bin: Dict[T, Dict[str, Any]],
                                       features_by_bin: Dict[T, List[str]],
                                       train_split_col: str,
                                       train_split_bins: List,
                                       nthread: int,
                                       target_column: str,
                                       prediction_column: str = "prediction") -> LearnerReturnType:

    """
    Octopus ensemble allows you to inject domain specific knowledge to force a split in an initial feature, instead of
    assuming the tree model will do that intelligent split on its own. It works by first defining a split on your
    dataset and then training one individual model in each separated dataset.
github nubank / fklearn / src / fklearn / training / regression.py View on Github external
@log_learner_time(learner_name='xgb_regression_learner')
def xgb_regression_learner(df: pd.DataFrame,
                           features: List[str],
                           target: str,
                           learning_rate: float = 0.1,
                           num_estimators: int = 100,
                           extra_params: Dict[str, Any] = None,
                           prediction_column: str = "prediction",
                           weight_column: str = None,
                           encode_extra_cols: bool = True) -> LearnerReturnType:
    """
    Fits an XGBoost regressor to the dataset. It first generates a DMatrix
    with the specified features and labels from `df`. Then it fits a XGBoost
    model to this DMatrix. Return the predict function for the model and the
    predictions for the input dataset.

    Parameters
github nubank / fklearn / src / fklearn / training / classification.py View on Github external
@log_learner_time(learner_name='catboost_classification_learner')
def catboost_classification_learner(df: pd.DataFrame,
                                    features: List[str],
                                    target: str,
                                    learning_rate: float = 0.1,
                                    num_estimators: int = 100,
                                    extra_params: LogType = None,
                                    prediction_column: str = "prediction",
                                    weight_column: str = None,
                                    encode_extra_cols: bool = True) -> LearnerReturnType:
    """
    Fits an CatBoost classifier to the dataset. It first generates a DMatrix
    with the specified features and labels from `df`. Then, it fits a CatBoost
    model to this DMatrix. Return the predict function for the model and the
    predictions for the input dataset.

    Parameters
github nubank / fklearn / src / fklearn / training / unsupervised.py View on Github external
@log_learner_time(learner_name='isolation_forest_learner')
def isolation_forest_learner(df: pd.DataFrame,
                             features: List[str],
                             params: Dict[str, Any] = None,
                             prediction_column: str = "prediction",
                             encode_extra_cols: bool = True) -> LearnerReturnType:
    """
    Fits an anomaly detection algorithm (Isolation Forest) to the dataset

    Parameters
    ----------
    df : pandas.DataFrame
        A Pandas' DataFrame with features and target columns.
        The model will be trained to predict the target column
        from the features.

    features : list of str
github nubank / fklearn / src / fklearn / training / transformation.py View on Github external
@log_learner_time(learner_name='target_categorizer')
def target_categorizer(df: pd.DataFrame,
                       columns_to_categorize: List[str],
                       target_column: str,
                       smoothing: float = 1.0,
                       ignore_unseen: bool = True,
                       store_mapping: bool = False) -> LearnerReturnType:
    """
    Replaces categorical variables with the smoothed mean of the target variable by category.
    Uses a weighted average with the overall mean of the target variable for smoothing.

    Parameters
    ----------
    df : pandas.DataFrame
        A Pandas' DataFrame that must contain `columns_to_categorize` and `target_column` columns.

    columns_to_categorize : list of str
github nubank / fklearn / src / fklearn / training / transformation.py View on Github external
@log_learner_time(learner_name='null_injector')
def null_injector(df: pd.DataFrame,
                  proportion: float,
                  columns_to_inject: Optional[List[str]] = None,
                  groups: Optional[List[List[str]]] = None,
                  seed: int = 1) -> LearnerReturnType:
    """
    Applies a custom function to the desired columns.

    Parameters
    ----------
    df : pandas.DataFrame
        A Pandas' DataFrame that must contain `columns_to_inject` as columns

    columns_to_inject : list of str
        A list of features to inject nulls. If groups is not None it will be ignored.
github nubank / fklearn / src / fklearn / training / transformation.py View on Github external
@log_learner_time(learner_name="truncate_categorical")
def truncate_categorical(df: pd.DataFrame,
                         columns_to_truncate: List[str],
                         percentile: float,
                         replacement: Union[str, float] = -9999,
                         replace_unseen: Union[str, float] = -9999,
                         store_mapping: bool = False) -> LearnerReturnType:
    """
    Truncate infrequent categories and replace them by a single one.
    You can think of it like "others" category.

    Parameters
    ----------
    df : pandas.DataFrame
        A Pandas' DataFrame that must contain a `prediction_column` columns.

    columns_to_truncate : list of str
github nubank / fklearn / src / fklearn / training / regression.py View on Github external
@log_learner_time(learner_name='catboost_regressor_learner')
def catboost_regressor_learner(df: pd.DataFrame,
                               features: List[str],
                               target: str,
                               learning_rate: float = 0.1,
                               num_estimators: int = 100,
                               extra_params: Dict[str, Any] = None,
                               prediction_column: str = "prediction",
                               weight_column: str = None) -> LearnerReturnType:
    """
    Fits an CatBoost regressor to the dataset. It first generates a Pool
    with the specified features and labels from `df`. Then it fits a CatBoost
    model to this Pool. Return the predict function for the model and the
    predictions for the input dataset.

    Parameters
    ----------
github nubank / fklearn / src / fklearn / training / regression.py View on Github external
@log_learner_time(learner_name='custom_supervised_model_learner')
def custom_supervised_model_learner(df: pd.DataFrame,
                                    features: List[str],
                                    target: str,
                                    model: Any,
                                    supervised_type: str,
                                    log: Dict[str, Dict],
                                    prediction_column: str = "prediction") -> LearnerReturnType:
    """
    Fits a custom model to the dataset.
    Return the predict function, the predictions for the input dataset and a log describing the model.

    Parameters
    ----------

    df : pandas.DataFrame
        A Pandas' DataFrame with features and target columns.
github nubank / fklearn / src / fklearn / training / classification.py View on Github external
@log_learner_time(learner_name='logistic_classification_learner')
def logistic_classification_learner(df: pd.DataFrame,
                                    features: List[str],
                                    target: str,
                                    params: LogType = None,
                                    prediction_column: str = "prediction",
                                    weight_column: str = None,
                                    encode_extra_cols: bool = True) -> LearnerReturnType:
    """
    Fits an logistic regression classifier to the dataset. Return the predict function
    for the model and the predictions for the input dataset.

    Parameters
    ----------

    df : pandas.DataFrame
        A Pandas' DataFrame with features and target columns.