How to use the shap.common.DenseData function in shap

To help you get started, we’ve selected a few shap examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github interpretml / interpret-community / test / test_serialize_explanation.py View on Github external
def _assert_explanation_equivalence(actual, expected):
    # get the non-null properties in the expected explanation
    paramkeys = filter(lambda x, expected=expected: hasattr(expected, getattr(ExplainParams, x)),
                       list(ExplainParams.get_serializable()))
    for paramkey in paramkeys:
        param = getattr(ExplainParams, paramkey)
        actual_value = getattr(actual, param, None)
        expected_value = getattr(expected, param, None)
        if isinstance(actual_value, DatasetWrapper) or isinstance(actual_value, DenseData):
            if isinstance(actual_value.original_dataset, np.ndarray):
                actual_dataset = actual_value.original_dataset.tolist()
            else:
                actual_dataset = actual_value.original_dataset
            if isinstance(expected_value.original_dataset, np.ndarray):
                expected_dataset = expected_value.original_dataset.tolist()
            else:
                expected_dataset = expected_value.original_dataset
            np.testing.assert_array_equal(actual_dataset, expected_dataset)
        elif isinstance(actual_value, (np.ndarray, collections.Sequence)):
            np.testing.assert_array_equal(actual_value, expected_value)
        elif isinstance(actual_value, pd.DataFrame) and isinstance(expected_value, pd.DataFrame):
            np.testing.assert_array_equal(actual_value.values, expected_value.values)
        else:
            assert actual_value == expected_value
github slundberg / shap / shap / explainers / kernel.py View on Github external
-------
    DenseData object.
    """

    group_names = [str(i) for i in range(X.shape[1])]
    if str(type(X)).endswith("'pandas.core.frame.DataFrame'>"):
        group_names = X.columns
        X = X.values
    kmeans = KMeans(n_clusters=k, random_state=0).fit(X)

    if round_values:
        for i in range(k):
            for j in range(X.shape[1]):
                ind = np.argmin(np.abs(X[:,j] - kmeans.cluster_centers_[i,j]))
                kmeans.cluster_centers_[i,j] = X[ind,j]
    return DenseData(kmeans.cluster_centers_, group_names, None, 1.0*np.bincount(kmeans.labels_))
github interpretml / interpret-community / python / interpret_community / explanation / explanation.py View on Github external
:rtype: str
    """
    paramkeys = list(ExplainParams.get_serializable())
    expldict = dict()
    _metadata = dict()
    for paramkey in paramkeys:
        param = getattr(ExplainParams, paramkey)
        if hasattr(explanation, param):
            value = getattr(explanation, param)
            if isinstance(value, pd.DataFrame):
                expldict[param] = value.values.tolist()
                _metadata[param] = 'DataFrame'
            elif isinstance(value, DatasetWrapper):
                expldict[param] = value.original_dataset.tolist()
                _metadata[param] = 'DatasetWrapper'
            elif isinstance(value, DenseData):
                expldict[param] = value.original_dataset.tolist()
                _metadata[param] = 'DenseData'
            elif isinstance(value, np.ndarray):
                expldict[param] = value.tolist()
                _metadata[param] = 'ndarray'
            else:
                expldict[param] = value
    return json.dumps({
        '_metadata': _metadata,
        'explanation': expldict
    })
github slundberg / shap / shap / common.py View on Github external
valid = (not t and l == data.shape[1]) or (t and l == data.shape[0])
        assert valid, "# of names must match data matrix!"

        self.weights = args[1] if len(args) > 1 else np.ones(num_samples)
        self.weights /= np.sum(self.weights)
        wl = len(self.weights)
        valid = (not t and wl == data.shape[0]) or (t and wl == data.shape[1])
        assert valid, "# weights must match data matrix!"

        self.transposed = t
        self.group_names = group_names
        self.data = data
        self.groups_size = len(self.groups)


class DenseDataWithIndex(DenseData):
    def __init__(self, data, group_names, index, index_name, *args):
        DenseData.__init__(self, data, group_names, *args)
        self.index_value = index
        self.index_name = index_name

    def convert_to_df(self):
        data = pd.DataFrame(self.data, columns=self.group_names)
        index = pd.DataFrame(self.index_value, columns=[self.index_name])
        df = pd.concat([index, data], axis=1)
        df = df.set_index(self.index_name)
        return df


def convert_to_data(val, keep_index=False):
    if isinstance(val, Data):
        return val
github interpretml / interpret-community / python / interpret_community / common / model_wrapper.py View on Github external
:type examples: DatasetWrapper
    :param model_task: Optional parameter to specify whether the model is a classification or regression model.
        In most cases, the type of the model can be inferred based on the shape of the output, where a classifier
        has a predict_proba method and outputs a 2 dimensional array, while a regressor has a predict method and
        outputs a 1 dimensional array.
    :type model_task: str
    :param wrapped: Indicates if function has already been wrapped.
    :type wrapped: bool
    :return: The function chosen from given model and chosen domain.
    :rtype (function, str)
    """
    # Try to run the function on a single example - if it doesn't work wrap
    # it in a function that converts a 1D array to 2D for those functions
    # that only support 2D arrays as input
    examples_dataset = examples.dataset
    if isinstance(examples_dataset, DenseData):
        examples_dataset = examples_dataset.data
    try:
        result = function(examples.typed_wrapper_func(examples_dataset[0]))
    except Exception as ex:
        # If function has already been wrapped, re-throw error to prevent stack overflow
        if wrapped:
            raise ex

        def function_input_1D_wrapper(dataset):
            if len(dataset.shape) == 1:
                dataset = dataset.reshape(1, -1)
            return function(dataset)

        return _eval_function(function_input_1D_wrapper, examples, model_task, wrapped=True)
    if len(result.shape) == 2:
        # If the result of evaluation the function is a 2D array of 1 column,
github slundberg / shap / shap / common.py View on Github external
def convert_to_data(val, keep_index=False):
    if isinstance(val, Data):
        return val
    elif type(val) == np.ndarray:
        return DenseData(val, [str(i) for i in range(val.shape[1])])
    elif str(type(val)).endswith("'pandas.core.series.Series'>"):
        return DenseData(val.values.reshape((1,len(val))), list(val.index))
    elif str(type(val)).endswith("'pandas.core.frame.DataFrame'>"):
        if keep_index:
            return DenseDataWithIndex(val.values, list(val.columns), val.index.values, val.index.name)
        else:
            return DenseData(val.values, list(val.columns))
    elif sp.sparse.issparse(val):
        if not sp.sparse.isspmatrix_csr(val):
            val = val.tocsr()
        return SparseData(val)
    else:
        assert False, "Unknown type passed as data object: "+str(type(val))
github slundberg / shap / shap / plots / force.py View on Github external
feature_names = [labels['FEATURE'] % str(i) for i in range(shap_values.shape[1])]
            if features is None:
                display_features = ["" for i in range(len(feature_names))]
            else:
                display_features = features[k, :]

            instance = Instance(np.ones((1, len(feature_names))), display_features)
            e = AdditiveExplanation(
                base_value,
                np.sum(shap_values[k, :]) + base_value,
                shap_values[k, :],
                None,
                instance,
                link,
                Model(None, out_names),
                DenseData(np.ones((1, len(feature_names))), list(feature_names))
            )
            exps.append(e)
        
        return visualize(
                    exps, 
                    plot_cmap=plot_cmap, 
                    ordering_keys=ordering_keys, 
                    ordering_keys_time_format=ordering_keys_time_format, 
                    text_rotation=text_rotation
                )
github slundberg / shap / shap / common.py View on Github external
def __init__(self, data, group_names, index, index_name, *args):
        DenseData.__init__(self, data, group_names, *args)
        self.index_value = index
        self.index_name = index_name
github interpretml / interpret-community / python / interpret_community / mimic / mimic_explainer.py View on Github external
self._column_indexer = initialization_examples.string_index(columns=categorical_features)
            self._one_hot_encoder = None
            explainable_model_args[LightGBMParams.CATEGORICAL_FEATURE] = categorical_features
        else:
            # One-hot-encode categoricals for models that don't support categoricals natively
            self._column_indexer = initialization_examples.string_index(columns=categorical_features)
            self._one_hot_encoder = initialization_examples.one_hot_encode(columns=categorical_features)

        self.classes = classes
        self.explain_subset = explain_subset
        self.transformations = transformations
        self._shap_values_output = shap_values_output
        # Train the mimic model on the given model
        training_data = initialization_examples.dataset
        self.initialization_examples = initialization_examples
        if isinstance(training_data, DenseData):
            training_data = training_data.data

        explainable_model_args[ExplainParams.CLASSIFICATION] = self.predict_proba_flag
        if self._supports_shap_values_output(explainable_model):
            explainable_model_args[ExplainParams.SHAP_VALUES_OUTPUT] = shap_values_output
        self.surrogate_model = _model_distill(self.function, explainable_model, training_data,
                                              original_training_data, explainable_model_args)
        self._method = self.surrogate_model._method
        self._original_eval_examples = None
        self._allow_all_transformations = allow_all_transformations
github interpretml / interpret-community / python / interpret_community / common / explanation_utils.py View on Github external
For sparse dataset, use a sparse row for the background with calculated
    median for dense columns.

    :param X: Matrix of data samples to summarize (# samples x # features).
    :type X: numpy.array or pandas.DataFrame or scipy.sparse.csr_matrix
    :param k: Number of cluster centroids to use for approximation.
    :type k: int
    :param to_round_values: When using kmeans, for each element of every cluster centroid to match the nearest value
        from X in the corresponding dimension. This ensures discrete features
        always get a valid value.  Ignored for sparse data sample.
    :type to_round_values: bool
    :return: DenseData or SparseData object.
    :rtype: iml.datatypes.DenseData or iml.datatypes.SparseData
    """
    is_sparse = sp.sparse.issparse(X)
    if not isinstance(X, DenseData):
        if is_sparse:
            module_logger.debug('Creating sparse data summary as csr matrix')
            # calculate median of sparse background data
            median_dense = csc_median_axis_0(X.tocsc())
            return sp.sparse.csr_matrix(median_dense)
        elif len(X) > 10 * k:
            module_logger.debug('Create dense data summary with k-means')
            # use kmeans to summarize the examples for initialization
            # if there are more than 10 x k of them
            return shap.kmeans(X, k, to_round_values)
    return X