How to use the catboost.python-package.catboost.core.EFstrType function in catboost

To help you get started, we’ve selected a few catboost examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github catboost / catboost / catboost / python-package / catboost / core.py View on Github external
    def get_feature_importance(self, data=None, fstr_type=EFstrType.PredictionValuesChange, prettified=False, thread_count=-1, verbose=False):
        """
        Parameters
        ----------
        data : catboost.Pool or None
            Data to get feature importance.
            If type == Shap data is a dataset. For every object in this dataset feature importances will be calculated.
            If type == 'PredictionValuesChange', data is None or train dataset (in case if model was explicitly trained with flag store no leaf weights).

        fstr_type : EFStrType or string (deprecated, converted to EFstrType), optional
                    (default=EFstrType.PredictionValuesChange)
            Possible values:
                - PredictionValuesChange
                    Calculate score for every feature.
                - ShapValues
                    Calculate SHAP Values for every object.
                - Interaction
github catboost / catboost / catboost / python-package / catboost / core.py View on Github external
verbose = int(verbose)
        if verbose < 0:
            raise CatboostError('verbose should be non-negative.')

        fstr_type = enum_from_enum_or_str(EFstrType, fstr_type)
        empty_data_is_ok = (((fstr_type == EFstrType.PredictionValuesChange) and self._object._has_leaf_weights_in_model())
                            or (fstr_type == EFstrType.Interaction))
        if not empty_data_is_ok:
            if not isinstance(data, Pool):
                raise CatboostError("Invalid metric type={}, must be catboost.Pool.".format(type(data)))
            if data.is_empty_:
                raise CatboostError("data is empty.")

        with log_fixup():
            fstr, feature_names = self._calc_fstr(fstr_type, data, thread_count, verbose)
        if fstr_type == EFstrType.PredictionValuesChange or fstr_type == EFstrType.LossFunctionChange:
            feature_importances = [value[0] for value in fstr]
            if prettified:
                return sorted(zip(feature_names, feature_importances), key=itemgetter(1), reverse=True)
            else:
                return feature_importances
        if fstr_type == EFstrType.ShapValues:
            if isinstance(fstr[0][0], ARRAY_TYPES):
                return np.array([np.array([np.array([
                    value for value in dimension]) for dimension in doc]) for doc in fstr])
            else:
                return np.array([np.array([value for value in doc]) for doc in fstr])
        elif fstr_type == EFstrType.Interaction:
            return [[int(row[0]), int(row[1]), row[2]] for row in fstr]
github catboost / catboost / catboost / python-package / catboost / core.py View on Github external
or (fstr_type == EFstrType.Interaction))
        if not empty_data_is_ok:
            if not isinstance(data, Pool):
                raise CatboostError("Invalid metric type={}, must be catboost.Pool.".format(type(data)))
            if data.is_empty_:
                raise CatboostError("data is empty.")

        with log_fixup():
            fstr, feature_names = self._calc_fstr(fstr_type, data, thread_count, verbose)
        if fstr_type == EFstrType.PredictionValuesChange or fstr_type == EFstrType.LossFunctionChange:
            feature_importances = [value[0] for value in fstr]
            if prettified:
                return sorted(zip(feature_names, feature_importances), key=itemgetter(1), reverse=True)
            else:
                return feature_importances
        if fstr_type == EFstrType.ShapValues:
            if isinstance(fstr[0][0], ARRAY_TYPES):
                return np.array([np.array([np.array([
                    value for value in dimension]) for dimension in doc]) for doc in fstr])
            else:
                return np.array([np.array([value for value in doc]) for doc in fstr])
        elif fstr_type == EFstrType.Interaction:
            return [[int(row[0]), int(row[1]), row[2]] for row in fstr]
github catboost / catboost / catboost / python-package / catboost / core.py View on Github external
np.array of shape (n_objects, n_features + 1) with Shap values (float) for (object, feature).
                In case of multiclass the returned value is np.array of shape
                (n_objects, classes_count, n_features + 1). For each object it contains Shap values (float).
                Values are calculated for RawFormulaVal predictions.
            - Interaction
                list of length [n_features] of 3-element lists of (first_feature_index, second_feature_index, interaction_score (float))
        """

        if not isinstance(verbose, bool) and not isinstance(verbose, int):
            raise CatboostError('verbose should be bool or int.')
        verbose = int(verbose)
        if verbose < 0:
            raise CatboostError('verbose should be non-negative.')

        fstr_type = enum_from_enum_or_str(EFstrType, fstr_type)
        empty_data_is_ok = (((fstr_type == EFstrType.PredictionValuesChange) and self._object._has_leaf_weights_in_model())
                            or (fstr_type == EFstrType.Interaction))
        if not empty_data_is_ok:
            if not isinstance(data, Pool):
                raise CatboostError("Invalid metric type={}, must be catboost.Pool.".format(type(data)))
            if data.is_empty_:
                raise CatboostError("data is empty.")

        with log_fixup():
            fstr, feature_names = self._calc_fstr(fstr_type, data, thread_count, verbose)
        if fstr_type == EFstrType.PredictionValuesChange or fstr_type == EFstrType.LossFunctionChange:
            feature_importances = [value[0] for value in fstr]
            if prettified:
                return sorted(zip(feature_names, feature_importances), key=itemgetter(1), reverse=True)
            else:
                return feature_importances
        if fstr_type == EFstrType.ShapValues:
github catboost / catboost / catboost / python-package / catboost / core.py View on Github external
with log_fixup():
            fstr, feature_names = self._calc_fstr(fstr_type, data, thread_count, verbose)
        if fstr_type == EFstrType.PredictionValuesChange or fstr_type == EFstrType.LossFunctionChange:
            feature_importances = [value[0] for value in fstr]
            if prettified:
                return sorted(zip(feature_names, feature_importances), key=itemgetter(1), reverse=True)
            else:
                return feature_importances
        if fstr_type == EFstrType.ShapValues:
            if isinstance(fstr[0][0], ARRAY_TYPES):
                return np.array([np.array([np.array([
                    value for value in dimension]) for dimension in doc]) for doc in fstr])
            else:
                return np.array([np.array([value for value in doc]) for doc in fstr])
        elif fstr_type == EFstrType.Interaction:
            return [[int(row[0]), int(row[1]), row[2]] for row in fstr]
github catboost / catboost / catboost / python-package / catboost / core.py View on Github external
- ShapValues
                np.array of shape (n_objects, n_features + 1) with Shap values (float) for (object, feature).
                In case of multiclass the returned value is np.array of shape
                (n_objects, classes_count, n_features + 1). For each object it contains Shap values (float).
                Values are calculated for RawFormulaVal predictions.
            - Interaction
                list of length [n_features] of 3-element lists of (first_feature_index, second_feature_index, interaction_score (float))
        """

        if not isinstance(verbose, bool) and not isinstance(verbose, int):
            raise CatboostError('verbose should be bool or int.')
        verbose = int(verbose)
        if verbose < 0:
            raise CatboostError('verbose should be non-negative.')

        fstr_type = enum_from_enum_or_str(EFstrType, fstr_type)
        empty_data_is_ok = (((fstr_type == EFstrType.PredictionValuesChange) and self._object._has_leaf_weights_in_model())
                            or (fstr_type == EFstrType.Interaction))
        if not empty_data_is_ok:
            if not isinstance(data, Pool):
                raise CatboostError("Invalid metric type={}, must be catboost.Pool.".format(type(data)))
            if data.is_empty_:
                raise CatboostError("data is empty.")

        with log_fixup():
            fstr, feature_names = self._calc_fstr(fstr_type, data, thread_count, verbose)
        if fstr_type == EFstrType.PredictionValuesChange or fstr_type == EFstrType.LossFunctionChange:
            feature_importances = [value[0] for value in fstr]
            if prettified:
                return sorted(zip(feature_names, feature_importances), key=itemgetter(1), reverse=True)
            else:
                return feature_importances