Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_isolation_forest():
import shap
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.ensemble.iforest import _average_path_length
X,y = shap.datasets.boston()
iso = IsolationForest( behaviour='new', contamination='auto')
iso.fit(X)
explainer = shap.TreeExplainer(iso)
shap_values = explainer.shap_values(X)
score_from_shap = - 2**(
- (np.sum(shap_values, axis=1) + explainer.expected_value) /
_average_path_length(np.array([iso.max_samples_]))[0]
)
assert np.allclose(iso.score_samples(X), score_from_shap, atol=1e-7)
def test_front_page_model_agnostic():
import sklearn
import shap
from sklearn.model_selection import train_test_split
# print the JS visualization code to the notebook
shap.initjs()
# train a SVM classifier
X_train, X_test, Y_train, Y_test = train_test_split(*shap.datasets.iris(), test_size=0.1, random_state=0)
svm = sklearn.svm.SVC(kernel='rbf', probability=True)
svm.fit(X_train, Y_train)
# use Kernel SHAP to explain test set predictions
explainer = shap.KernelExplainer(svm.predict_proba, X_train, nsamples=100, link="logit")
shap_values = explainer.shap_values(X_test)
# plot the SHAP values for the Setosa output of the first instance
shap.force_plot(explainer.expected_value[0], shap_values[0][0, :], X_test.iloc[0, :], link="logit")
def test_kernel_sparse_vs_dense_multirow_background():
import sklearn
import shap
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
# train a logistic regression classifier
X_train, X_test, Y_train, _ = train_test_split(*shap.datasets.iris(), test_size=0.1, random_state=0)
lr = LogisticRegression(solver='lbfgs')
lr.fit(X_train, Y_train)
# use Kernel SHAP to explain test set predictions with dense data
explainer = shap.KernelExplainer(lr.predict_proba, X_train, nsamples=100, link="logit", l1_reg="rank(3)")
shap_values = explainer.shap_values(X_test)
X_sparse_train = sp.sparse.csr_matrix(X_train)
X_sparse_test = sp.sparse.csr_matrix(X_test)
lr_sparse = LogisticRegression(solver='lbfgs')
lr_sparse.fit(X_sparse_train, Y_train)
# use Kernel SHAP again but with sparse data
sparse_explainer = shap.KernelExplainer(lr.predict_proba, X_sparse_train, nsamples=100, link="logit", l1_reg="rank(3)")
sparse_shap_values = sparse_explainer.shap_values(X_sparse_test)
assert(np.allclose(shap_values, sparse_shap_values, rtol=1e-05, atol=1e-05))
def test_kernel_shap_with_a1a_sparse_nonzero_background():
np.set_printoptions(threshold=100000)
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.utils.sparsefuncs import csc_median_axis_0
import shap
np.random.seed(0)
X, y = shap.datasets.a1a() # pylint: disable=unbalanced-tuple-unpacking
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.01, random_state=0)
linear_model = LinearRegression()
linear_model.fit(x_train, y_train)
# Calculate median of background data
median_dense = csc_median_axis_0(x_train.tocsc())
median = sp.sparse.csr_matrix(median_dense)
explainer = shap.KernelExplainer(linear_model.predict, median)
shap_values = explainer.shap_values(x_test)
def dense_to_sparse_predict(data):
sparse_data = sp.sparse.csr_matrix(data)
return linear_model.predict(sparse_data)
explainer_dense = shap.KernelExplainer(dense_to_sparse_predict, median_dense.reshape((1, len(median_dense))))
x_test_dense = x_test.toarray()
shap_values_dense = explainer_dense.shap_values(x_test_dense)
# Validate sparse and dense result is the same
assert(np.allclose(shap_values, shap_values_dense, rtol=1e-02, atol=1e-01))
from sklearn.model_selection import train_test_split
# print the JS visualization code to the notebook
shap.initjs()
# train a SVM classifier
X_train, X_test, Y_train, Y_test = train_test_split(*shap.datasets.iris(), test_size=0.1, random_state=0)
svm = sklearn.svm.SVC(kernel='rbf', probability=True)
svm.fit(X_train, Y_train)
# use Kernel SHAP to explain test set predictions
explainer = shap.KernelExplainer(svm.predict_proba, X_train, nsamples=100, link="logit", l1_reg="rank(3)")
shap_values = explainer.shap_values(X_test)
# plot the SHAP values for the Setosa output of the first instance
shap.force_plot(explainer.expected_value[0], shap_values[0][0, :], X_test.iloc[0, :], link="logit")
from sklearn.model_selection import train_test_split
# print the JS visualization code to the notebook
shap.initjs()
# train a SVM classifier
X_train, X_test, Y_train, Y_test = train_test_split(*shap.datasets.iris(), test_size=0.2, random_state=0)
svm = sklearn.svm.SVC(kernel='rbf', probability=True)
svm.fit(X_train, Y_train)
# use Kernel SHAP to explain test set predictions
explainer = shap.KernelExplainer(svm.predict_proba, X_train, nsamples=100, link="logit")
shap_values = explainer.shap_values(X_test)
# plot the SHAP values for the Setosa output of the first instance
shap.force_plot(shap_values[0][0, :], X_test.iloc[0, :], link="logit")
for model in models:
model.fit(X, y)
# explain the model's predictions using SHAP values
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)
# visualize the first prediction's explaination
shap.force_plot(explainer.expected_value, shap_values[0, :], X.iloc[0, :])
# visualize the training set predictions
shap.force_plot(explainer.expected_value, shap_values, X)
# create a SHAP dependence plot to show the effect of a single feature across the whole dataset
shap.dependence_plot(5, shap_values, X, show=False)
shap.dependence_plot("RM", shap_values, X, show=False)
# summarize the effects of all the features
shap.summary_plot(shap_values, X, show=False)
import lightgbm
except:
print("Skipping test_lightgbm_multiclass!")
return
import shap
# train lightgbm model
X, Y = shap.datasets.iris()
model = lightgbm.sklearn.LGBMClassifier()
model.fit(X, Y)
# explain the model's predictions using SHAP values
shap_values = shap.TreeExplainer(model).shap_values(X)
# ensure plot works for first class
shap.dependence_plot(0, shap_values[0], X, show=False)
import xgboost
except Exception as e:
print("Skipping test_xgboost_multiclass!")
return
import shap
# train XGBoost model
X, Y = shap.datasets.iris()
model = xgboost.XGBClassifier(objective="binary:logistic", max_depth=4)
model.fit(X, Y)
# explain the model's predictions using SHAP values (use pred_contrib in LightGBM)
shap_values = shap.TreeExplainer(model).shap_values(X)
# ensure plot works for first class
shap.dependence_plot(0, shap_values[0], X, show=False)
# train model
X, y = shap.datasets.boston()
model = sklearn.ensemble.RandomForestRegressor(n_estimators=100)
model.fit(X, y)
# explain the model's predictions using SHAP values (use pred_contrib in LightGBM)
shap_values = shap.TreeExplainer(model).shap_values(X)
# visualize the first prediction's explaination
shap.force_plot(shap_values[0, :], X.iloc[0, :])
# visualize the training set predictions
shap.force_plot(shap_values, X)
# create a SHAP dependence plot to show the effect of a single feature across the whole dataset
shap.dependence_plot(5, shap_values, X, show=False)
shap.dependence_plot("RM", shap_values, X, show=False)
# summarize the effects of all the features
shap.summary_plot(shap_values, X, show=False)