Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def explainer(method: str,
path_to_file: str,
text: str,
num_samples: int) -> LimeTextExplainer:
"""Run LIME explainer on provided classifier"""
model = explainer_class(method, path_to_file)
predictor = model.predict
# Create a LimeTextExplainer
explainer = LimeTextExplainer(
# Specify split option
split_expression=lambda x: x.split(),
# Our classifer uses bigrams or contextual ordering to classify text
# Hence, order matters, and we cannot use bag of words.
bow=False,
# Specify class names for this case
class_names=[1, 2, 3, 4, 5]
)
# Make a prediction and explain it:
exp = explainer.explain_instance(
text,
classifier_fn=predictor,
top_labels=1,
num_features=20,
num_samples=num_samples,
Takes as input a list of strings that make up the documents where LIME
should be applied to. Returns Explanation class instances.
Parameters
----------
docs : list of strings
List of input documents.
Returns
-------
exps : list of classes
For each input document, an Explanation class object on which for
example the .to_list, to_notebook etc functions can be called on.
"""
explainer = LimeTextExplainer()
experiments = []
for doc in docs: # NOTE: this might have messed up in a generator
experiment = explainer.explain_instance(
doc, self.pipeline.predict_proba, top_labels=self.n_classes)
experiments.append(experiment)
return experiments
import sklearn.preprocessing
from sklearn.pipeline import Pipeline
from scipy.spatial.distance import cdist
import pandas as pd
from lime.lime_text import LimeTextExplainer
import os
import joblib
from tqdm import tqdm
import logging
logger = logging.getLogger(__name__)
M = ds.score_unique()
explainer = LimeTextExplainer()
def _vectorizer(text_blocks):
v = np.array([M(x) for x in text_blocks])
return v
vectorizer = sklearn.preprocessing.FunctionTransformer(
_vectorizer, validate=False)
def _explain_text(text, P, num_features):
global explainer
exp = explainer.explain_instance(
text,
P.predict_proba,
def __init__(self, *argv, **kwargs):
"""
Initialize lime text explainer object.
"""
super(LimeTextExplainer, self).__init__(*argv, **kwargs)
self.explainer = lime_text.LimeTextExplainer(*argv, **kwargs)
from lime.lime_text import LimeTextExplainer
if len(self._text_columns) > 1 and not column_name:
raise ValueError('There are multiple text columns in the input of the model. ' +
'Please specify "column_name".')
elif column_name and column_name not in self._text_columns:
raise ValueError('Specified column_name "%s" not found in the model input.'
% column_name)
text_column_name = column_name if column_name else self._text_columns[0]
if isinstance(instance, six.string_types):
instance = next(csv.DictReader([instance], fieldnames=self._headers))
predict_fn = self._make_text_predict_fn(labels, instance, text_column_name)
explainer = LimeTextExplainer(class_names=labels)
exp = explainer.explain_instance(
instance[text_column_name], predict_fn, labels=range(len(labels)),
num_features=num_features, num_samples=num_samples)
return exp