How to use the shap.KernelExplainer function in shap

To help you get started, we’ve selected a few shap examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github slundberg / shap / tests / explainers / test_kernel.py View on Github external
def test_front_page_model_agnostic():
    import sklearn
    import shap
    from sklearn.model_selection import train_test_split

    # print the JS visualization code to the notebook
    shap.initjs()

    # train a SVM classifier
    X_train, X_test, Y_train, Y_test = train_test_split(*shap.datasets.iris(), test_size=0.1, random_state=0)
    svm = sklearn.svm.SVC(kernel='rbf', probability=True)
    svm.fit(X_train, Y_train)

    # use Kernel SHAP to explain test set predictions
    explainer = shap.KernelExplainer(svm.predict_proba, X_train, nsamples=100, link="logit")
    shap_values = explainer.shap_values(X_test)

    # plot the SHAP values for the Setosa output of the first instance
    shap.force_plot(explainer.expected_value[0], shap_values[0][0, :], X_test.iloc[0, :], link="logit")
github slundberg / shap / tests / explainers / test_kernel.py View on Github external
def test_kernel_sparse_vs_dense_multirow_background():
    import sklearn
    import shap
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression

    # train a logistic regression classifier
    X_train, X_test, Y_train, _ = train_test_split(*shap.datasets.iris(), test_size=0.1, random_state=0)
    lr = LogisticRegression(solver='lbfgs')
    lr.fit(X_train, Y_train)

    # use Kernel SHAP to explain test set predictions with dense data
    explainer = shap.KernelExplainer(lr.predict_proba, X_train, nsamples=100, link="logit", l1_reg="rank(3)")
    shap_values = explainer.shap_values(X_test)

    X_sparse_train = sp.sparse.csr_matrix(X_train)
    X_sparse_test = sp.sparse.csr_matrix(X_test)

    lr_sparse = LogisticRegression(solver='lbfgs')
    lr_sparse.fit(X_sparse_train, Y_train)

    # use Kernel SHAP again but with sparse data
    sparse_explainer = shap.KernelExplainer(lr.predict_proba, X_sparse_train, nsamples=100, link="logit", l1_reg="rank(3)")
    sparse_shap_values = sparse_explainer.shap_values(X_sparse_test)

    assert(np.allclose(shap_values, sparse_shap_values, rtol=1e-05, atol=1e-05))
github slundberg / shap / tests / explainers / test_kernel.py View on Github external
def test_kernel_shap_with_a1a_sparse_nonzero_background():
    np.set_printoptions(threshold=100000)
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LinearRegression
    from sklearn.utils.sparsefuncs import csc_median_axis_0
    import shap
    np.random.seed(0)

    X, y = shap.datasets.a1a() # pylint: disable=unbalanced-tuple-unpacking
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.01, random_state=0)
    linear_model = LinearRegression()
    linear_model.fit(x_train, y_train)
    # Calculate median of background data
    median_dense = csc_median_axis_0(x_train.tocsc())
    median = sp.sparse.csr_matrix(median_dense)
    explainer = shap.KernelExplainer(linear_model.predict, median)
    shap_values = explainer.shap_values(x_test)

    def dense_to_sparse_predict(data):
        sparse_data = sp.sparse.csr_matrix(data)
        return linear_model.predict(sparse_data)

    explainer_dense = shap.KernelExplainer(dense_to_sparse_predict, median_dense.reshape((1, len(median_dense))))
    x_test_dense = x_test.toarray()
    shap_values_dense = explainer_dense.shap_values(x_test_dense)
    # Validate sparse and dense result is the same
    assert(np.allclose(shap_values, shap_values_dense, rtol=1e-02, atol=1e-01))
github slundberg / shap / tests / explainers / test_tree.py View on Github external
# train a model with single tree
    Xd = xgboost.DMatrix(X, label=y)
    model = xgboost.train({'eta':1, 
                       'max_depth':max_depth, 
                       'base_score': 0, 
                       "lambda": 0}, 
                      Xd, 1)
    ypred = model.predict(Xd)

    # Compare for five random samples
    for i in range(5):
        x_ind = np.random.choice(X.shape[1]); x = X[x_ind:x_ind+1,:]

        expl = shap.TreeExplainer(model, X, feature_perturbation="interventional")
        f = lambda inp : model.predict(xgboost.DMatrix(inp))
        expl_kern = shap.KernelExplainer(f, X)

        itshap = expl.shap_values(x)
        kshap = expl_kern.shap_values(x, nsamples=150)
        assert np.allclose(itshap,kshap), \
        "Kernel SHAP doesn't match Independent Tree SHAP!"
        assert np.allclose(itshap.sum() + expl.expected_value, ypred[x_ind]), \
        "SHAP values don't sum to model output!"
github slundberg / shap / tests / test_basic.py View on Github external
def test_null_model_small():
    explainer = shap.KernelExplainer(lambda x: np.zeros(x.shape[0]), np.ones((2, 4)), nsamples=100)
    e = explainer.explain(np.ones((1, 4)))
    assert np.sum(np.abs(e.effects)) < 1e-8
github slundberg / shap / tests / explainers / test_tree.py View on Github external
# train a model with single tree
    Xd = xgboost.DMatrix(X, label=y)
    model = xgboost.train({'eta':1, 
                       'max_depth':max_depth, 
                       'base_score': 0, 
                       "lambda": 0}, 
                      Xd, 1)
    ypred = model.predict(Xd)

    # Compare for five random samples
    for i in range(5):
        x_ind = np.random.choice(X.shape[1]); x = X[x_ind:x_ind+1,:]

        expl = shap.TreeExplainer(model, X, feature_perturbation="interventional")
        f = lambda inp : model.predict(xgboost.DMatrix(inp))
        expl_kern = shap.KernelExplainer(f, X)

        itshap = expl.shap_values(x)
        kshap = expl_kern.shap_values(x, nsamples=150)
        assert np.allclose(itshap,kshap), \
        "Kernel SHAP doesn't match Independent Tree SHAP!"
        assert np.allclose(itshap.sum() + expl.expected_value, ypred[x_ind]), \
        "SHAP values don't sum to model output!"
github slundberg / shap / tests / explainers / test_kernel.py View on Github external
def test_null_model():
    explainer = shap.KernelExplainer(lambda x: np.zeros(x.shape[0]), np.ones((2, 10)), nsamples=100)
    e = explainer.explain(np.ones((1, 10)))
    assert np.sum(np.abs(e)) < 1e-8
github Ashton-Sidhu / aethos / aethos / model_analysis / model_explanation.py View on Github external
self.x_test = x_test
        self.y_test = y_test

        if learner == "linear":
            self.explainer = shap.LinearExplainer(
                self.model, self.x_train, feature_dependence="independent"
            )
        elif learner == "tree":
            self.explainer = shap.TreeExplainer(self.model)
        elif learner == "kernel":
            if hasattr(self.model, "predict_proba"):
                func = self.model.predict_proba
            else:
                func = self.model.predict

            self.explainer = shap.KernelExplainer(func, self.x_train)
        else:
            raise ValueError(f"Learner: {learner} is not supported yet.")

        self.expected_value = self.explainer.expected_value
        self.shap_values = np.array(self.explainer.shap_values(self.x_test)).astype(
            float
        )

        if isinstance(self.model, lgb.sklearn.LGBMClassifier) and isinstance(
            self.expected_value, np.float
        ):
            self.shap_values = self.shap_values[1]

        # Calculate misclassified values
        self.misclassified_values = self._calculate_misclassified()
github biolab / orange3-prototypes / orangecontrib / prototypes / explanation / explainer.py View on Github external
progress_callback: Callable,
) -> Tuple[List[np.ndarray], np.ndarray, np.ndarray]:
    """
    Computes SHAP values for any learner with KernelExplainer.
    """
    # 1000 is a number that for normal data and model do not take so long
    data_sample, sample_mask = _subsample_data(transformed_data, 1000)

    try:
        ref = kmeans(transformed_reference_data.X, k=10)
    except ValueError:
        # k-means fails with value error when it cannot produce enough clusters
        # in this case we will use sample instead of clusters
        ref = sample(transformed_reference_data.X, nsamples=100)

    explainer = KernelExplainer(
        lambda x: (
            model(x)
            if model.domain.class_var.is_continuous
            else model(x, model.Probs)
        ),
        ref,
    )

    shap_values = []
    for i, row in enumerate(data_sample.X):
        progress_callback(i / len(data_sample))
        shap_values.append(
            explainer.shap_values(row, nsamples=100, silent=True, l1_reg=False)
        )
    return (
        _join_shap_values(shap_values),
github interpretml / interpret-community / python / interpret_community / shap / kernel_explainer.py View on Github external
def _reset_evaluation_background(self, function, **kwargs):
        """Modify the explainer to use the new evalaution example for background data.

        Note when constructing explainer an evaluation example is not available hence the initialization data is used.

        :param function: Function.
        :type function: Function that accepts a 2d ndarray
        """
        function, summary = self._prepare_function_and_summary(function, self.original_data_ref,
                                                               self.current_index_list,
                                                               explain_subset=self.explain_subset, **kwargs)
        self.explainer = shap.KernelExplainer(function, summary)