Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if catValues:
catFeatValues = dict()
catFeatIndices = list()
items = catValues.split(",")
for item in items:
parts = item.split(":")
col = int(parts[0])
catFeatValues[col] = parts[1:]
catFeatIndices.append(col)
encoder = CatLabelGenerator(catFeatValues, ",")
for c in catFeatIndices:
values = encoder.getOrigLabels(c)
catFeatValues[c] = values
self.explainer = lime.lime_tabular.LimeTabularExplainer(trainFeatData, feature_names=featNames,\
categorical_features=catFeatIndices, categorical_names=catFeatValues, kernel_width=kernelWidth,\
verbose=verbose,class_names=classNames,feature_selection=featSelection,sample_around_instance=sampLocal)
regressor = ensemble.RandomForestRegressor()
regressor.fit(X_train, y_train)
##########################################################
# Inspect predictions for a few houses
#
# For this, separate out the categorical features:
import numpy as np
categorical_features = [i for i, col in enumerate(boston.data.T)
if np.unique(col).size < 10]
##########################################################
# Now use a lime explainer for tabular data
from lime.lime_tabular import LimeTabularExplainer
explainer = LimeTabularExplainer(X_train,
feature_names=boston.feature_names,
class_names=['price'],
categorical_features=categorical_features,
mode='regression')
# Now explain a prediction
exp = explainer.explain_instance(X_test[25], regressor.predict,
num_features=10)
exp.as_pyplot_figure()
from matplotlib import pyplot as plt
plt.tight_layout()
##########################################################
print(exp.as_list())
##########################################################
pytorch_model = load_orig_imagenet_model(arch_name='resnet50')
# load the class label
label_map = load_imagenet_label_map()
elif args.dataset == 'places365':
pytorch_model = load_orig_places365_model(arch_name='resnet50')
# load the class label
label_map = load_class_label()
else:
print('Invalid datasest!!')
exit(0)
pytorch_explainer = lime_image.LimeImageExplainer(random_state=args.lime_explainer_seed)
slic_parameters = {'n_segments': args.lime_superpixel_num, 'compactness': 30, 'sigma': 3}
segmenter = SegmentationAlgorithm('slic', **slic_parameters)
pill_transf = get_pil_transform()
#########################################################
# Function to compute probabilities
# Pytorch
pytorch_preprocess_transform = get_pytorch_preprocess_transform()
def pytorch_batch_predict(images):
batch = torch.stack(tuple(pytorch_preprocess_transform(i) for i in images), dim=0)
batch = batch.to('cuda')
if args.if_pre == 1:
logits = pytorch_model(batch)
probs = F.softmax(logits, dim=1)
X_train, X_test, y_train, y_test = train_test_split(X_vec, y_vec,
train_size=0.55)
simple_rf_pipeline.fit(X_train, y_train)
#%%
import os,sys
try:
import lime
except:
sys.path.append(os.path.join('..', '..')) # add the current directory
import lime
#%%
from lime import lime_image
from lime.wrappers.scikit_image import SegmentationAlgorithm
explainer = lime_image.LimeImageExplainer(verbose = False)
segmenter = SegmentationAlgorithm('quickshift', kernel_size=1, max_dist=200, ratio=0.2)
#%%
explanation = explainer.explain_instance(X_test[0],
classifier_fn = simple_rf_pipeline.predict_proba,
top_labels=10, hide_color=0, num_samples=10000, segmentation_fn=segmenter)
#%%
temp, mask = explanation.get_image_and_mask(y_test[0], positive_only=True, num_features=10, hide_rest=False, min_weight = 0.01)
fig, (ax1, ax2) = plt.subplots(1,2, figsize = (8, 4))
ax1.imshow(label2rgb(mask,temp, bg_label = 0), interpolation = 'nearest')
ax1.set_title('Positive Regions for {}'.format(y_test[0]))
temp, mask = explanation.get_image_and_mask(y_test[0], positive_only=False, num_features=10, hide_rest=False, min_weight = 0.01)
ax2.imshow(label2rgb(3-mask,temp, bg_label = 0), interpolation = 'nearest')
ax2.set_title('Positive/Negative Regions for {}'.format(y_test[0]))
def explainer(method: str,
path_to_file: str,
text: str,
num_samples: int) -> LimeTextExplainer:
"""Run LIME explainer on provided classifier"""
model = explainer_class(method, path_to_file)
predictor = model.predict
# Create a LimeTextExplainer
explainer = LimeTextExplainer(
# Specify split option
split_expression=lambda x: x.split(),
# Our classifer uses bigrams or contextual ordering to classify text
# Hence, order matters, and we cannot use bag of words.
bow=False,
# Specify class names for this case
class_names=[1, 2, 3, 4, 5]
)
# Make a prediction and explain it:
exp = explainer.explain_instance(
text,
classifier_fn=predictor,
top_labels=1,
num_features=20,
num_samples=num_samples,
if(self.training_data_stats is None):
discretized_training_data = self.discretizer.discretize(
training_data)
if kernel_width is None:
kernel_width = np.sqrt(training_data.shape[1]) * .75
kernel_width = float(kernel_width)
if kernel is None:
def kernel(d, kernel_width):
return np.sqrt(np.exp(-(d ** 2) / kernel_width ** 2))
kernel_fn = partial(kernel, kernel_width=kernel_width)
self.feature_selection = feature_selection
self.base = lime_base.LimeBase(kernel_fn, verbose, random_state=self.random_state)
self.class_names = class_names
# Though set has no role to play if training data stats are provided
self.scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
self.scaler.fit(training_data)
self.feature_values = {}
self.feature_frequencies = {}
for feature in self.categorical_features:
if training_data_stats is None:
if self.discretizer is not None:
column = discretized_training_data[:, feature]
else:
column = training_data[:, feature]
feature_count = collections.Counter(column)
train_size=0.55)
simple_rf_pipeline.fit(X_train, y_train)
#%%
import os,sys
try:
import lime
except:
sys.path.append(os.path.join('..', '..')) # add the current directory
import lime
#%%
from lime import lime_image
from lime.wrappers.scikit_image import SegmentationAlgorithm
explainer = lime_image.LimeImageExplainer(verbose = False)
segmenter = SegmentationAlgorithm('quickshift', kernel_size=1, max_dist=200, ratio=0.2)
#%%
explanation = explainer.explain_instance(X_test[0],
classifier_fn = simple_rf_pipeline.predict_proba,
top_labels=10, hide_color=0, num_samples=10000, segmentation_fn=segmenter)
#%%
temp, mask = explanation.get_image_and_mask(y_test[0], positive_only=True, num_features=10, hide_rest=False, min_weight = 0.01)
fig, (ax1, ax2) = plt.subplots(1,2, figsize = (8, 4))
ax1.imshow(label2rgb(mask,temp, bg_label = 0), interpolation = 'nearest')
ax1.set_title('Positive Regions for {}'.format(y_test[0]))
temp, mask = explanation.get_image_and_mask(y_test[0], positive_only=False, num_features=10, hide_rest=False, min_weight = 0.01)
ax2.imshow(label2rgb(3-mask,temp, bg_label = 0), interpolation = 'nearest')
ax2.set_title('Positive/Negative Regions for {}'.format(y_test[0]))
#%%
# load the class label
label_map = load_imagenet_label_map()
elif args.dataset == 'places365':
pytorch_model = load_orig_places365_model(arch_name='resnet50')
# load the class label
label_map = load_class_label()
else:
print('Invalid datasest!!')
exit(0)
pytorch_explainer = lime_image.LimeImageExplainer(random_state=args.lime_explainer_seed)
slic_parameters = {'n_segments': args.lime_superpixel_num, 'compactness': 30, 'sigma': 3}
segmenter = SegmentationAlgorithm('slic', **slic_parameters)
pill_transf = get_pil_transform()
#########################################################
# Function to compute probabilities
# Pytorch
pytorch_preprocess_transform = get_pytorch_preprocess_transform()
def pytorch_batch_predict(images):
batch = torch.stack(tuple(pytorch_preprocess_transform(i) for i in images), dim=0)
batch = batch.to('cuda')
if args.if_pre == 1:
logits = pytorch_model(batch)
probs = F.softmax(logits, dim=1)
else:
probs = pytorch_model(batch)
def get_lime(request, pk): # TODO: changed self to request, check if correct or not
# get model
TARGET_MODEL = 1090
job = Job.objects.filter(pk=pk)[0]
model = joblib.load(job.predictive_model.model_path)
# load data
training_df, test_df = get_encoded_logs(job)
# get random point in evaluation set
EXPLANATION_TARGET = 1
# get the actual explanation
explainer = lime.lime_tabular.LimeTabularExplainer(
training_df.drop(['trace_id', 'label'], 1).as_matrix(),
feature_names=list(training_df.drop(['trace_id', 'label'], 1).columns.values),
categorical_features=[i for i in range(len(list(training_df.drop(['trace_id', 'label'], 1).columns.values)))],
verbose=True,
mode='classification',
)
exp = explainer.explain_instance(
test_df.drop(['trace_id', 'label'], 1).iloc[EXPLANATION_TARGET],
# TODO probably the opposite would be way less computationally intesive
model[0].predict_proba,
num_features=5
)
exp.as_list()
# show plot
exp.show_in_notebook(show_table=True)
raise ValueError(
'Indices given in the {} parameter '
'are not valid for the input data '
'array.'.format(categorical_indices_keyword))
init_params[categorical_indices_keyword] = np.array(
[data.dtype.names.index(y) for y in categorical_indices])
data = fuat.as_unstructured(data)
# Get a LIME tabular explainer
self.mode = init_params.get('mode', 'classification')
if self.mode not in ['classification', 'regression']:
raise ValueError("The mode must be either 'classification' or "
"'regression'. '{}' given.".format(self.mode))
self.tabular_explainer = lime.lime_tabular.LimeTabularExplainer(
data, **init_params)
# Check the model
self.model = model
self.model_is_probabilistic = False
if model is not None:
if fumv.check_model_functionality(
model, require_probabilities=True, suppress_warning=True):
self.model_is_probabilistic = True
elif fumv.check_model_functionality(
model, require_probabilities=False, suppress_warning=True):
self.model_is_probabilistic = False
logger.warning('The model can only be used for LIME in a '
'regressor mode.')
else:
raise IncompatibleModelError('LIME requires a model object to '