How to use the shap.summary_plot function in shap

To help you get started, we’ve selected a few shap examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github slundberg / shap / tests / explainers / test_tree.py View on Github external
# explain the model's predictions using SHAP values
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X)

    # visualize the first prediction's explaination
    shap.force_plot(explainer.expected_value, shap_values[0, :], X.iloc[0, :])

    # visualize the training set predictions
    shap.force_plot(explainer.expected_value, shap_values, X)

    # create a SHAP dependence plot to show the effect of a single feature across the whole dataset
    shap.dependence_plot(5, shap_values, X, show=False)
    shap.dependence_plot("RM", shap_values, X, show=False)

    # summarize the effects of all the features
    shap.summary_plot(shap_values, X, show=False)
github slundberg / shap / tests / test_basic.py View on Github external
# explain the model's predictions using SHAP values (use pred_contrib in LightGBM)
    shap_values = shap.TreeExplainer(model).shap_values(X)

    # visualize the first prediction's explaination
    shap.force_plot(shap_values[0, :], X.iloc[0, :])

    # visualize the training set predictions
    shap.force_plot(shap_values, X)

    # create a SHAP dependence plot to show the effect of a single feature across the whole dataset
    shap.dependence_plot(5, shap_values, X, show=False)
    shap.dependence_plot("RM", shap_values, X, show=False)

    # summarize the effects of all the features
    shap.summary_plot(shap_values, X, show=False)
github slundberg / shap / tests / explainers / test_tree.py View on Github external
# explain the model's predictions using SHAP values
        explainer = shap.TreeExplainer(model)
        shap_values = explainer.shap_values(X)

        # visualize the first prediction's explaination
        shap.force_plot(explainer.expected_value, shap_values[0, :], X.iloc[0, :])

        # visualize the training set predictions
        shap.force_plot(explainer.expected_value, shap_values, X)

        # create a SHAP dependence plot to show the effect of a single feature across the whole dataset
        shap.dependence_plot(5, shap_values, X, show=False)
        shap.dependence_plot("RM", shap_values, X, show=False)

        # summarize the effects of all the features
        shap.summary_plot(shap_values, X, show=False)
github albertsl / toolkit / templates / python for data science.py View on Github external
#ALE Plots: faster and unbiased alternative to partial dependence plots (PDPs). They have a serious problem when the features are correlated.
#The computation of a partial dependence plot for a feature that is strongly correlated with other features involves averaging predictions of artificial data instances that are unlikely in reality. This can greatly bias the estimated feature effect.
#https://github.com/blent-ai/ALEPython

#SHAP Values: Understand how each feature affects every individual prediciton
import shap
data_for_prediction = X_val.iloc[row_num]
explainer = shap.TreeExplainer(model)  #Use DeepExplainer for Deep Learning models, KernelExplainer for all other models
shap_vals = explainer.shap_values(data_for_prediction)
shap.initjs()
shap.force_plot(explainer.expected_value[1], shap_vals[1], data_for_prediction)

#We can also do a SHAP plot of the whole dataset
shap_vals = explainer.shap_values(X_val)
shap.summary_plot(shap_vals[1], X_val)
#SHAP Dependence plot
shap.dependence_plot('feature_for_x', shap_vals[1], X_val, interaction_index="feature_for_color")

#Local interpretable model-agnostic explanations (LIME)
#Surrogate models are trained to approximate the predictions of the underlying black box model. Instead of training a global surrogate model, LIME focuses on training local surrogate models to explain individual predictions.
#https://github.com/marcotcr/lime 

#Dimensionality reduction
#SVD: Find the percentage of variance explained by each principal component
#First scale the data
U, S, V = np.linalg.svd(df, full_matrices=False)
importance = S/S.sum()
varinace_explained = importance.cumsum()*100
#PCA: Decompose the data in a defined number of variables keeping the most variance possible.
from sklearn.decomposition import PCA
pca = PCA(n_components=2, svd_solver='full')
github mozilla / bugbug / bugbug / model.py View on Github external
feature_names = self.get_human_readable_feature_names()
        if self.calculate_importance and len(feature_names):
            explainer = shap.TreeExplainer(self.clf)
            shap_values = explainer.shap_values(X_train)

            # In the binary case, sometimes shap returns a single shap values matrix.
            if is_binary and not isinstance(shap_values, list):
                shap_values = [-shap_values, shap_values]
                summary_plot_value = shap_values[1]
                summary_plot_type = "layered_violin"
            else:
                summary_plot_value = shap_values
                summary_plot_type = None

            shap.summary_plot(
                summary_plot_value,
                to_array(X_train),
                feature_names=feature_names,
                class_names=self.class_names,
                plot_type=summary_plot_type,
                show=False,
            )

            matplotlib.pyplot.savefig("feature_importance.png", bbox_inches="tight")
            matplotlib.pyplot.xlabel("Impact on model output")
            matplotlib.pyplot.clf()

            important_features = self.get_important_features(
                importance_cutoff, shap_values
            )
github uber / causalml / causalml / inference / meta / explainer.py View on Github external
def plot_shap_values(self, shap_dict=None):
        """
        Calculates and plots the distribution of shapley values of each feature, for each treatment group.
        Skips the calculation part if shap_dict is given.
        """
        if shap_dict is None:
            shap_dict = self.get_shap_values()

        for group, values in shap_dict.items():
            plt.title(group)
            shap.summary_plot(values, feature_names=self.features)
github KienVu2368 / tabint / tabint / interpretation.py View on Github external
def summary_plot(self, plot_type = 'violin', alpha=0.3):
        """violin, layered_violin, dot"""
        return shap.summary_plot(self.shap_values, self.df, alpha=alpha, plot_type = plot_type)
github produvia / kryptos / ml / ml / utils / feature_exploration.py View on Github external
def visualize_model(model, X, idx, configuration, namespace, name):

    if configuration['enabled'] and idx % configuration['n_iterations'] == 0:

        explainer = shap.TreeExplainer(model)
        shap_values = explainer.shap_values(X)
        shap.summary_plot(shap_values, X, plot_type="bar", show=False)
        save_fig(namespace, name, idx, importance_type='shap')

        if name == 'XGBOOST':
            for i in ['weight', 'cover', 'gain']:
                if i == 'gain':
                    xgb.plot_importance(model.get_score(fmap='', importance_type=i), importance_type=i, max_num_features=20)
                else:
                    xgb.plot_importance(model, importance_type=i, max_num_features=20)
                save_fig(namespace, name, idx, importance_type=i)

        elif name == 'LIGHTGBM':
            for i in ['split', 'gain']:
                lgb.plot_importance(model, importance_type=i, max_num_features=20)
                save_fig(namespace, name, idx, importance_type=i)

        else:
github Ashton-Sidhu / aethos / aethos / model_analysis / model_explanation.py View on Github external
def summary_plot(self, output_file="", **summaryplot_kwargs):
        """
        Plots a SHAP summary plot.

        Parameters
        ----------
        output_file: str
            Output file name including extension (.png, .jpg, etc.) to save image as.
        """

        import shap

        shap.summary_plot(
            self.shap_values,
            self.x_test_array,
            feature_names=self.x_train.columns,
            show=False,
            **summaryplot_kwargs,
        )

        if output_file:  # pragma: no cover
            pl.savefig(os.path.join(IMAGE_DIR, self.model_name, output_file))
github KienVu2368 / tabint / tabint / interpretation.py View on Github external
def importance_plot(self):
        return shap.summary_plot(self.shap_values, self.df, plot_type="bar")