How to use the pdpipe.shared._interpret_columns_param function in pdpipe

To help you get started, weโ€™ve selected a few pdpipe examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pdpipe / pdpipe / pdpipe / basic_stages.py View on Github external
def __init__(self, conditions, reduce=None, columns=None, **kwargs):
        self._conditions = conditions
        if reduce is None:
            reduce = 'any'
        self._reduce = reduce
        self._columns = None
        if columns:
            self._columns = _interpret_columns_param(columns)
        if reduce not in RowDrop._REDUCERS.keys():
            raise ValueError((
                "{} is an unsupported argument for the 'reduce' parameter of "
                "the RowDrop constructor!").format(reduce))
        self._cond_is_dict = isinstance(conditions, dict)
        self._columns_str = ""
        if self._cond_is_dict:
            valid = all([callable(cond) for cond in conditions.values()])
            if not valid:
                raise ValueError(
                    "Condition dicts given to RowDrop must map to callables!")
            self._columns = list(conditions.keys())
            self._columns_str = _list_str(self._columns)
        else:
            valid = all([callable(cond) for cond in conditions])
            if not valid:
github pdpipe / pdpipe / pdpipe / basic_stages.py View on Github external
def __init__(self, values, columns=None, **kwargs):
        self._values = values
        self._values_str = _list_str(self._values)
        self._columns_str = _list_str(columns)
        if columns is None:
            self._columns = None
            apply_msg = ValDrop._DEF_VALDROP_APPLY_MSG.format(
                self._values_str)
        else:
            self._columns = _interpret_columns_param(columns)
            apply_msg = ValDrop._DEF_VALDROP_APPLY_MSG.format(
                "{} in {}".format(
                    self._values_str, self._columns_str))
        super_kwargs = {
            'exmsg': ValDrop._DEF_VALDROP_EXC_MSG.format(self._columns_str),
            'appmsg': apply_msg,
            'desc': self._default_desc()
        }
        super_kwargs.update(**kwargs)
        super().__init__(**super_kwargs)
github pdpipe / pdpipe / pdpipe / col_generation.py View on Github external
self,
        columns=None,
        exclude=None,
        drop=False,
        non_neg=False,
        const_shift=None,
        **kwargs
    ):
        if columns is None:
            self._columns = None
        else:
            self._columns = _interpret_columns_param(columns)
        if exclude is None:
            self._exclude = []
        else:
            self._exclude = _interpret_columns_param(exclude)
        self._drop = drop
        self._non_neg = non_neg
        self._const_shift = const_shift
        self._col_to_minval = {}
        col_str = "all numeric columns"
        if self._columns:
            col_str = _list_str(self._columns)
        super_kwargs = {
            "exmsg": Log._DEF_LOG_EXC_MSG.format(col_str),
            "appmsg": Log._DEF_LOG_APP_MSG.format(col_str),
            "desc": "Log-transform {}".format(col_str),
        }
        super_kwargs.update(**kwargs)
        super().__init__(**super_kwargs)
github pdpipe / pdpipe / pdpipe / nltk_stages.py View on Github external
def __init__(self, columns, threshold, drop=True, **kwargs):
        self._columns = _interpret_columns_param(columns)
        self._threshold = threshold
        self._drop = drop
        self._rare_removers = {}
        col_str = _list_str(self._columns)
        super_kwargs = {
            'exmsg': DropRareTokens._DEF_RARE_EXC_MSG.format(col_str),
            'appmsg': "Dropping rare tokens from {}...".format(col_str),
            'desc': "Drop rare tokens from {}".format(col_str)
        }
        super_kwargs.update(**kwargs)
        super().__init__(**super_kwargs)
github pdpipe / pdpipe / pdpipe / basic_stages.py View on Github external
def __init__(self, columns, errors=None, **kwargs):
        self._columns = columns
        self._errors = errors
        self._columns_str = _list_str(self._columns)
        if not callable(columns):
            self._columns = _interpret_columns_param(columns)
        super_kwargs = {
            'exmsg': ColDrop._DEF_COLDROP_EXC_MSG.format(self._columns_str),
            'appmsg': ColDrop._DEF_COLDROP_APPLY_MSG.format(self._columns_str),
            'desc': self._default_desc()
        }
        super_kwargs.update(**kwargs)
        super().__init__(**super_kwargs)
github pdpipe / pdpipe / pdpipe / col_generation.py View on Github external
def __init__(
        self,
        columns=None,
        dummy_na=False,
        exclude_columns=None,
        col_subset=False,
        drop_first=True,
        drop=True,
        **kwargs
    ):
        if columns is None:
            self._columns = None
        else:
            self._columns = _interpret_columns_param(columns)
        self._dummy_na = dummy_na
        if exclude_columns is None:
            self._exclude_columns = []
        else:
            self._exclude_columns = _interpret_columns_param(exclude_columns)
        self._col_subset = col_subset
        self._drop_first = drop_first
        self._drop = drop
        self._dummy_col_map = {}
        self._encoder_map = {}
        col_str = _list_str(self._columns)
        super_kwargs = {
            "exmsg": OneHotEncode._DEF_1HENCODE_EXC_MSG.format(col_str),
            "appmsg": OneHotEncode._DEF_1HENCODE_APP_MSG.format(
                col_str or "all columns"
            ),
github pdpipe / pdpipe / pdpipe / col_generation.py View on Github external
def __init__(
        self,
        columns,
        func,
        result_columns=None,
        drop=True,
        func_desc=None,
        **kwargs
    ):
        self._columns = _interpret_columns_param(columns)
        self._func = func
        if result_columns is None:
            if drop:
                self._result_columns = self._columns
            else:
                self._result_columns = [col + "_app" for col in self._columns]
        else:
            self._result_columns = _interpret_columns_param(result_columns)
            if len(self._result_columns) != len(self._columns):
                raise ValueError(
                    "columns and result_columns parameters must"
                    " be string lists of the same length!"
                )
        self._drop = drop
        if func_desc is None:
            func_desc = ""
github pdpipe / pdpipe / pdpipe / sklearn_stages.py View on Github external
def __init__(
        self,
        scaler,
        exclude_columns=None,
        exclude_object_columns=True,
        **kwargs
    ):
        self.scaler = scaler
        if exclude_columns is None:
            self._exclude_columns = []
            desc_suffix = "."
        else:
            self._exclude_columns = _interpret_columns_param(exclude_columns)
            col_str = _list_str(self._exclude_columns)
            desc_suffix = " except columns {}.".format(col_str)
        self._exclude_obj_cols = exclude_object_columns
        super_kwargs = {
            "exmsg": Scale._DEF_SCALE_EXC_MSG,
            "appmsg": Scale._DEF_SCALE_APP_MSG,
            "desc": Scale._DESC_PREFIX + desc_suffix,
        }
        self._kwargs = kwargs
        valid_super_kwargs = super()._init_kwargs()
        for key in kwargs:
            if key in valid_super_kwargs:
                super_kwargs[key] = kwargs[key]
        super().__init__(**super_kwargs)
github pdpipe / pdpipe / pdpipe / col_generation.py View on Github external
def __init__(
        self,
        columns=None,
        exclude=None,
        drop=False,
        non_neg=False,
        const_shift=None,
        **kwargs
    ):
        if columns is None:
            self._columns = None
        else:
            self._columns = _interpret_columns_param(columns)
        if exclude is None:
            self._exclude = []
        else:
            self._exclude = _interpret_columns_param(exclude)
        self._drop = drop
        self._non_neg = non_neg
        self._const_shift = const_shift
        self._col_to_minval = {}
        col_str = "all numeric columns"
        if self._columns:
            col_str = _list_str(self._columns)
        super_kwargs = {
            "exmsg": Log._DEF_LOG_EXC_MSG.format(col_str),
            "appmsg": Log._DEF_LOG_APP_MSG.format(col_str),
            "desc": "Log-transform {}".format(col_str),
        }
github pdpipe / pdpipe / pdpipe / col_generation.py View on Github external
dummy_na=False,
        exclude_columns=None,
        col_subset=False,
        drop_first=True,
        drop=True,
        **kwargs
    ):
        if columns is None:
            self._columns = None
        else:
            self._columns = _interpret_columns_param(columns)
        self._dummy_na = dummy_na
        if exclude_columns is None:
            self._exclude_columns = []
        else:
            self._exclude_columns = _interpret_columns_param(exclude_columns)
        self._col_subset = col_subset
        self._drop_first = drop_first
        self._drop = drop
        self._dummy_col_map = {}
        self._encoder_map = {}
        col_str = _list_str(self._columns)
        super_kwargs = {
            "exmsg": OneHotEncode._DEF_1HENCODE_EXC_MSG.format(col_str),
            "appmsg": OneHotEncode._DEF_1HENCODE_APP_MSG.format(
                col_str or "all columns"
            ),
            "desc": "One-hot encode {}".format(
                col_str or "all categorical columns"
            ),
        }
        super_kwargs.update(**kwargs)