How to use the pdpipe.col_generation.OneHotEncode function in pdpipe

To help you get started, weโ€™ve selected a few pdpipe examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pdpipe / pdpipe / tests / col_generation / test_onehotencode.py View on Github external
def test_onehotencode_with_dummy_na():
    """Basic binning test."""
    df = _one_categ_df_with_nan()
    onehotencode = OneHotEncode("Born", dummy_na=True)
    res_df = onehotencode(df)
    assert "Born" not in res_df.columns
    assert "Born_nan" not in res_df.columns
    assert "Born_UK" in res_df.columns
    assert res_df["Born_UK"][1] == 0
    assert res_df["Born_UK"][2] == 1
    assert res_df["Born_UK"][3] == 0
    assert "Born_USA" in res_df.columns
    assert res_df["Born_USA"][1] == 1
    assert res_df["Born_USA"][2] == 0
    assert res_df["Born_USA"][3] == 0

    # check when fitted
    df2 = _one_categ_single_row_df()
    assert onehotencode.is_fitted
    res_df2 = onehotencode(df2, verbose=True)
github pdpipe / pdpipe / tests / col_generation / test_onehotencode.py View on Github external
def test_onehotencode_one_no_drop():
    """Basic binning test."""
    df = _one_categ_df()
    onehotencode = OneHotEncode("Born", drop=False)
    res_df = onehotencode(df, verbose=True)
    assert "Greece" not in res_df.columns
    assert "Born" in res_df.columns
    assert "Born_UK" in res_df.columns
    assert res_df["Born_UK"][1] == 0
    assert res_df["Born_UK"][2] == 1
    assert res_df["Born_UK"][3] == 0
    assert "Born_USA" in res_df.columns
    assert res_df["Born_USA"][1] == 1
    assert res_df["Born_USA"][2] == 0
    assert res_df["Born_USA"][3] == 0

    # check when fitted
    df2 = _one_categ_single_row_df()
    assert onehotencode.is_fitted
    res_df2 = onehotencode(df2, verbose=True)
github pdpipe / pdpipe / tests / col_generation / test_onehotencode.py View on Github external
def test_onehotencode_large():
    """Basic binning test."""
    df = _one_categ_df()
    onehotencode = OneHotEncode("Born")
    res_df = onehotencode(df, verbose=True)
    assert "Born" not in res_df.columns
    assert "Born_Greece" not in res_df.columns
    assert "Born_UK" in res_df.columns
    assert res_df["Born_UK"][1] == 0
    assert res_df["Born_UK"][2] == 1
    assert res_df["Born_UK"][3] == 0
    assert "Born_USA" in res_df.columns
    assert res_df["Born_USA"][1] == 1
    assert res_df["Born_USA"][2] == 0
    assert res_df["Born_USA"][3] == 0

    # check when fitted
    df2 = _one_categ_df_large()
    assert onehotencode.is_fitted
    res_df2 = onehotencode(df2, verbose=True)
github pdpipe / pdpipe / tests / col_generation / test_onehotencode.py View on Github external
def test_onehotencode_with_dummy_na_no_drop_first():
    """Basic binning test."""
    df = _one_categ_df_with_nan()
    onehotencode = OneHotEncode("Born", dummy_na=True, drop_first=False)
    res_df = onehotencode(df)
    assert "Born" not in res_df.columns
    assert "Born_UK" in res_df.columns
    assert res_df["Born_UK"][1] == 0
    assert res_df["Born_UK"][2] == 1
    assert res_df["Born_UK"][3] == 0
    assert "Born_USA" in res_df.columns
    assert res_df["Born_USA"][1] == 1
    assert res_df["Born_USA"][2] == 0
    assert res_df["Born_USA"][3] == 0
    assert "Born_nan" in res_df.columns
    assert res_df["Born_nan"][1] == 0
    assert res_df["Born_nan"][2] == 0
    assert res_df["Born_nan"][3] == 1

    # check when fitted
github pdpipe / pdpipe / tests / col_generation / test_onehotencode.py View on Github external
def test_onehotencode_one_with_exclude():
    """Basic binning test."""
    df = _two_categ_df()
    onehotencode = OneHotEncode(exclude_columns=["Name"])
    res_df = onehotencode(df)
    assert "Born" not in res_df.columns
    assert "Name" in res_df.columns
    assert "Name_Bob" not in res_df.columns
    assert "Name_Jack" not in res_df.columns
    assert "Name_Yan" not in res_df.columns
    assert "Greece" not in res_df.columns
    assert "Born_UK" in res_df.columns
    assert res_df["Born_UK"][1] == 0
    assert res_df["Born_UK"][2] == 1
    assert res_df["Born_UK"][3] == 0
    assert "Born_USA" in res_df.columns
    assert res_df["Born_USA"][1] == 1
    assert res_df["Born_USA"][2] == 0
    assert res_df["Born_USA"][3] == 0
github pdpipe / pdpipe / tests / col_generation / test_onehotencode.py View on Github external
def test_onehotencode_with_nan():
    """Basic binning test."""
    df = _one_categ_df_with_nan()
    onehotencode = OneHotEncode("Born")
    res_df = onehotencode(df)
    print(res_df)
    assert "Born" not in res_df.columns
    assert "Born_UK" not in res_df.columns
    assert "Born_nan" not in res_df.columns
    assert "Born_USA" in res_df.columns
    assert len(res_df.columns) == 1
    assert res_df["Born_USA"][1] == 1
    assert res_df["Born_USA"][2] == 0
    assert res_df["Born_USA"][3] == 0

    # check when fitted
    df2 = _one_categ_single_row_df()
    assert onehotencode.is_fitted
    res_df2 = onehotencode(df2, verbose=True)
    print(res_df2)
github pdpipe / pdpipe / tests / col_generation / test_onehotencode.py View on Github external
def test_onehotencode_one_with_drop_first_colname(verbose):
    """Basic binning test."""
    df = _one_categ_df()
    onehotencode = OneHotEncode("Born", drop_first="UK")
    res_df = onehotencode(df, verbose=verbose)
    assert "Born" not in res_df.columns
    assert "Born_Greece" in res_df.columns
    assert "Born_UK" not in res_df.columns
    assert res_df["Born_Greece"][1] == 0
    assert res_df["Born_Greece"][2] == 0
    assert res_df["Born_Greece"][3] == 1
    assert "Born_USA" in res_df.columns
    assert res_df["Born_USA"][1] == 1
    assert res_df["Born_USA"][2] == 0
    assert res_df["Born_USA"][3] == 0

    # check when fitted
    df2 = _one_categ_single_row_df()
    assert onehotencode.is_fitted
    res_df2 = onehotencode(df2, verbose=True)
github pdpipe / pdpipe / pdpipe / col_generation.py View on Github external
self._columns = None
        else:
            self._columns = _interpret_columns_param(columns)
        self._dummy_na = dummy_na
        if exclude_columns is None:
            self._exclude_columns = []
        else:
            self._exclude_columns = _interpret_columns_param(exclude_columns)
        self._col_subset = col_subset
        self._drop_first = drop_first
        self._drop = drop
        self._dummy_col_map = {}
        self._encoder_map = {}
        col_str = _list_str(self._columns)
        super_kwargs = {
            "exmsg": OneHotEncode._DEF_1HENCODE_EXC_MSG.format(col_str),
            "appmsg": OneHotEncode._DEF_1HENCODE_APP_MSG.format(
                col_str or "all columns"
            ),
            "desc": "One-hot encode {}".format(
                col_str or "all categorical columns"
            ),
        }
        super_kwargs.update(**kwargs)
        super().__init__(**super_kwargs)
github pdpipe / pdpipe / pdpipe / col_generation.py View on Github external
dfirst_col = colname + "_" + str(self._drop_first)
                if dfirst_col in dummies:
                    if verbose:
                        print(
                            (
                                "Dropping {} dummy column instead of first "
                                "column when one-hot encoding {}."
                            ).format(dfirst_col, colname)
                        )
                    dummies.drop(dfirst_col, axis=1, inplace=True)
                elif nan_col in dummies:
                    dummies.drop(nan_col, axis=1, inplace=True)
                else:
                    dummies.drop(dummies.columns[0], axis=1, inplace=True)
            self._dummy_col_map[colname] = list(dummies.columns)
            self._encoder_map[colname] = OneHotEncode._FitterEncoder(
                colname, list(dummies.columns)
            )
            for column in dummies:
                assign_map[column] = dummies[column]

        inter_df = df.assign(**assign_map)
        self.is_fitted = True
        if self._drop:
            return inter_df.drop(columns_to_encode, axis=1)
        return inter_df
github pdpipe / pdpipe / pdpipe / col_generation.py View on Github external
else:
            self._columns = _interpret_columns_param(columns)
        self._dummy_na = dummy_na
        if exclude_columns is None:
            self._exclude_columns = []
        else:
            self._exclude_columns = _interpret_columns_param(exclude_columns)
        self._col_subset = col_subset
        self._drop_first = drop_first
        self._drop = drop
        self._dummy_col_map = {}
        self._encoder_map = {}
        col_str = _list_str(self._columns)
        super_kwargs = {
            "exmsg": OneHotEncode._DEF_1HENCODE_EXC_MSG.format(col_str),
            "appmsg": OneHotEncode._DEF_1HENCODE_APP_MSG.format(
                col_str or "all columns"
            ),
            "desc": "One-hot encode {}".format(
                col_str or "all categorical columns"
            ),
        }
        super_kwargs.update(**kwargs)
        super().__init__(**super_kwargs)