Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_feature_importance(self, data=None, fstr_type=EFstrType.PredictionValuesChange, prettified=False, thread_count=-1, verbose=False):
"""
Parameters
----------
data : catboost.Pool or None
Data to get feature importance.
If type == Shap data is a dataset. For every object in this dataset feature importances will be calculated.
If type == 'PredictionValuesChange', data is None or train dataset (in case if model was explicitly trained with flag store no leaf weights).
fstr_type : EFStrType or string (deprecated, converted to EFstrType), optional
(default=EFstrType.PredictionValuesChange)
Possible values:
- PredictionValuesChange
Calculate score for every feature.
- ShapValues
Calculate SHAP Values for every object.
- Interaction
verbose = int(verbose)
if verbose < 0:
raise CatboostError('verbose should be non-negative.')
fstr_type = enum_from_enum_or_str(EFstrType, fstr_type)
empty_data_is_ok = (((fstr_type == EFstrType.PredictionValuesChange) and self._object._has_leaf_weights_in_model())
or (fstr_type == EFstrType.Interaction))
if not empty_data_is_ok:
if not isinstance(data, Pool):
raise CatboostError("Invalid metric type={}, must be catboost.Pool.".format(type(data)))
if data.is_empty_:
raise CatboostError("data is empty.")
with log_fixup():
fstr, feature_names = self._calc_fstr(fstr_type, data, thread_count, verbose)
if fstr_type == EFstrType.PredictionValuesChange or fstr_type == EFstrType.LossFunctionChange:
feature_importances = [value[0] for value in fstr]
if prettified:
return sorted(zip(feature_names, feature_importances), key=itemgetter(1), reverse=True)
else:
return feature_importances
if fstr_type == EFstrType.ShapValues:
if isinstance(fstr[0][0], ARRAY_TYPES):
return np.array([np.array([np.array([
value for value in dimension]) for dimension in doc]) for doc in fstr])
else:
return np.array([np.array([value for value in doc]) for doc in fstr])
elif fstr_type == EFstrType.Interaction:
return [[int(row[0]), int(row[1]), row[2]] for row in fstr]
or (fstr_type == EFstrType.Interaction))
if not empty_data_is_ok:
if not isinstance(data, Pool):
raise CatboostError("Invalid metric type={}, must be catboost.Pool.".format(type(data)))
if data.is_empty_:
raise CatboostError("data is empty.")
with log_fixup():
fstr, feature_names = self._calc_fstr(fstr_type, data, thread_count, verbose)
if fstr_type == EFstrType.PredictionValuesChange or fstr_type == EFstrType.LossFunctionChange:
feature_importances = [value[0] for value in fstr]
if prettified:
return sorted(zip(feature_names, feature_importances), key=itemgetter(1), reverse=True)
else:
return feature_importances
if fstr_type == EFstrType.ShapValues:
if isinstance(fstr[0][0], ARRAY_TYPES):
return np.array([np.array([np.array([
value for value in dimension]) for dimension in doc]) for doc in fstr])
else:
return np.array([np.array([value for value in doc]) for doc in fstr])
elif fstr_type == EFstrType.Interaction:
return [[int(row[0]), int(row[1]), row[2]] for row in fstr]
np.array of shape (n_objects, n_features + 1) with Shap values (float) for (object, feature).
In case of multiclass the returned value is np.array of shape
(n_objects, classes_count, n_features + 1). For each object it contains Shap values (float).
Values are calculated for RawFormulaVal predictions.
- Interaction
list of length [n_features] of 3-element lists of (first_feature_index, second_feature_index, interaction_score (float))
"""
if not isinstance(verbose, bool) and not isinstance(verbose, int):
raise CatboostError('verbose should be bool or int.')
verbose = int(verbose)
if verbose < 0:
raise CatboostError('verbose should be non-negative.')
fstr_type = enum_from_enum_or_str(EFstrType, fstr_type)
empty_data_is_ok = (((fstr_type == EFstrType.PredictionValuesChange) and self._object._has_leaf_weights_in_model())
or (fstr_type == EFstrType.Interaction))
if not empty_data_is_ok:
if not isinstance(data, Pool):
raise CatboostError("Invalid metric type={}, must be catboost.Pool.".format(type(data)))
if data.is_empty_:
raise CatboostError("data is empty.")
with log_fixup():
fstr, feature_names = self._calc_fstr(fstr_type, data, thread_count, verbose)
if fstr_type == EFstrType.PredictionValuesChange or fstr_type == EFstrType.LossFunctionChange:
feature_importances = [value[0] for value in fstr]
if prettified:
return sorted(zip(feature_names, feature_importances), key=itemgetter(1), reverse=True)
else:
return feature_importances
if fstr_type == EFstrType.ShapValues:
with log_fixup():
fstr, feature_names = self._calc_fstr(fstr_type, data, thread_count, verbose)
if fstr_type == EFstrType.PredictionValuesChange or fstr_type == EFstrType.LossFunctionChange:
feature_importances = [value[0] for value in fstr]
if prettified:
return sorted(zip(feature_names, feature_importances), key=itemgetter(1), reverse=True)
else:
return feature_importances
if fstr_type == EFstrType.ShapValues:
if isinstance(fstr[0][0], ARRAY_TYPES):
return np.array([np.array([np.array([
value for value in dimension]) for dimension in doc]) for doc in fstr])
else:
return np.array([np.array([value for value in doc]) for doc in fstr])
elif fstr_type == EFstrType.Interaction:
return [[int(row[0]), int(row[1]), row[2]] for row in fstr]
- ShapValues
np.array of shape (n_objects, n_features + 1) with Shap values (float) for (object, feature).
In case of multiclass the returned value is np.array of shape
(n_objects, classes_count, n_features + 1). For each object it contains Shap values (float).
Values are calculated for RawFormulaVal predictions.
- Interaction
list of length [n_features] of 3-element lists of (first_feature_index, second_feature_index, interaction_score (float))
"""
if not isinstance(verbose, bool) and not isinstance(verbose, int):
raise CatboostError('verbose should be bool or int.')
verbose = int(verbose)
if verbose < 0:
raise CatboostError('verbose should be non-negative.')
fstr_type = enum_from_enum_or_str(EFstrType, fstr_type)
empty_data_is_ok = (((fstr_type == EFstrType.PredictionValuesChange) and self._object._has_leaf_weights_in_model())
or (fstr_type == EFstrType.Interaction))
if not empty_data_is_ok:
if not isinstance(data, Pool):
raise CatboostError("Invalid metric type={}, must be catboost.Pool.".format(type(data)))
if data.is_empty_:
raise CatboostError("data is empty.")
with log_fixup():
fstr, feature_names = self._calc_fstr(fstr_type, data, thread_count, verbose)
if fstr_type == EFstrType.PredictionValuesChange or fstr_type == EFstrType.LossFunctionChange:
feature_importances = [value[0] for value in fstr]
if prettified:
return sorted(zip(feature_names, feature_importances), key=itemgetter(1), reverse=True)
else:
return feature_importances