How to use eli5 - 10 common examples

To help you get started, we’ve selected a few eli5 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github TeamHG-Memex / eli5 / tests / test_sklearn_text.py View on Github external
vec.fit([doc])
    w_spans = get_weighted_spans(
        doc, vec,
        FeatureWeights(
            pos=[FW('see', 2), FW('leaning lemon', 5), FW('lemon tree', 8)],
            neg=[FW('tree', -6)]))
    assert w_spans == WeightedSpans(
        analyzer='word',
        document='i see: a leaning lemon tree',
        weighted_spans=[
            ('see', [(2, 5)], 2),
            ('tree', [(23, 27)], -6),
            ('leaning lemon', [(9, 16), (17, 22)], 5),
            ('lemon tree', [(17, 22), (23, 27)], 8)],
        other=FeatureWeights(
            pos=[FW(hl_in_text, 9)],
            neg=[],
        ))
github TeamHG-Memex / eli5 / tests / test_sklearn_text.py View on Github external
def test_weighted_spans_word():
    doc = 'I see: a leaning lemon tree'
    vec = CountVectorizer(analyzer='word')
    vec.fit([doc])
    w_spans = get_weighted_spans(
        doc, vec,
        FeatureWeights(
            pos=[FW('see', 2), FW('lemon', 4), FW('bias', 8)],
            neg=[FW('tree', -6)],
            neg_remaining=10
        ))
    assert w_spans == WeightedSpans(
        analyzer='word',
        document='i see: a leaning lemon tree',
        weighted_spans=[
            ('see', [(2, 5)], 2),
            ('lemon', [(17, 22)], 4),
            ('tree', [(23, 27)], -6)],
        other=FeatureWeights(
            pos=[FW('bias', 8), FW(hl_in_text, 0)],
            neg=[],
            neg_remaining=10,
        ))
github TeamHG-Memex / eli5 / tests / test_sklearn_text.py View on Github external
def test_unhashed_features_other():
    """ Check that when there are several candidates, they do not appear in "other"
    if at least one is found. If none are found, they should appear in "other"
    together.
    """
    doc = 'I see: a leaning lemon tree'
    vec = CountVectorizer(analyzer='char', ngram_range=(3, 3))
    vec.fit([doc])
    w_spans = get_weighted_spans(
        doc, vec,
        FeatureWeights(
            pos=[
                FW([{'name': 'foo', 'sign': 1}, {'name': 'see', 'sign': -1}], 2),
                FW([{'name': 'zoo', 'sign': 1}, {'name': 'bar', 'sign': 1}], 3),
            ],
            neg=[
                FW([{'name': 'ree', 'sign': 1}, {'name': 'tre', 'sign': 1}], -4),
            ],
        ))
    assert w_spans == WeightedSpans(
        analyzer='char',
        document='i see: a leaning lemon tree',
        weighted_spans=[
            ('see', [(2, 5)], 2),
            ('tre', [(23, 26)], -4),
            ('ree', [(24, 27)], -4),
            ],
        other=FeatureWeights(
            pos=[
github TeamHG-Memex / eli5 / tests / test_sklearn_text.py View on Github external
doc = 'I see: a leaning lemon tree'
    vec = CountVectorizer(analyzer='word', stop_words='english')
    vec.fit([doc])
    w_spans = get_weighted_spans(
        doc, vec,
        FeatureWeights(
            pos=[FW('see', 2), FW('lemon', 5), FW('bias', 8)],
            neg=[FW('tree', -6)]))
    assert w_spans == WeightedSpans(
        analyzer='word',
        document='i see: a leaning lemon tree',
        weighted_spans=[
            ('lemon', [(17, 22)], 5),
            ('tree', [(23, 27)], -6)],
        other=FeatureWeights(
            pos=[FW('bias', 8), FW('see', 2)],
            neg=[FW(hl_in_text, -1)],
        ))
github TeamHG-Memex / eli5 / tests / test_sklearn_vectorizers.py View on Github external
def test_explain_linear_dense():
    clf = LogisticRegression(random_state=42)
    data = [{'day': 'mon', 'moon': 'full'},
            {'day': 'tue', 'moon': 'rising'},
            {'day': 'tue', 'moon': 'rising'},
            {'day': 'mon', 'moon': 'rising'}]
    vec = DictVectorizer(sparse=False)
    X = vec.fit_transform(data)
    clf.fit(X, [0, 1, 1, 0])
    test_day = {'day': 'tue', 'moon': 'full'}
    target_names = ['sunny', 'shady']
    res1 = explain_prediction(clf, test_day, vec=vec, target_names=target_names)
    expl_text, expl_html = format_as_all(res1, clf)
    assert 'day=tue' in expl_text
    assert 'day=tue' in expl_html
    [test_day_vec] = vec.transform(test_day)
    res2 = explain_prediction(
        clf, test_day_vec, target_names=target_names,
        vectorized=True, feature_names=vec.get_feature_names())
    assert res1 == res2
github TeamHG-Memex / eli5 / tests / test_lightgbm.py View on Github external
def test_explain_prediction_clf_multitarget(newsgroups_train):
    docs, ys, target_names = newsgroups_train
    vec = CountVectorizer(stop_words='english', dtype=np.float64)
    xs = vec.fit_transform(docs)
    clf = LGBMClassifier(n_estimators=100, max_depth=2,
                         min_child_samples=1, min_child_weight=1)
    clf.fit(xs, ys)
    doc = 'computer graphics in space: a new religion'
    res = explain_prediction(clf, doc, vec=vec, target_names=target_names)
    format_as_all(res, clf)
    check_targets_scores(res)
    graphics_weights = res.targets[1].feature_weights
    assert 'computer' in get_all_features(graphics_weights.pos)
    religion_weights = res.targets[3].feature_weights
    assert 'religion' in get_all_features(religion_weights.pos)

    top_target_res = explain_prediction(clf, doc, vec=vec, top_targets=2)
    assert len(top_target_res.targets) == 2
    assert sorted(t.proba for t in top_target_res.targets) == sorted(
        t.proba for t in res.targets)[-2:]
github TeamHG-Memex / eli5 / tests / test_sklearn_vectorizers.py View on Github external
def test_explain_regression_hashing_vectorizer(newsgroups_train_binary):
    docs, y, target_names = newsgroups_train_binary
    vec = HashingVectorizer(norm=None)
    clf = LinearRegression()
    clf.fit(vec.fit_transform(docs), y)

    # Setting large "top" in order to compare it with CountVectorizer below
    # (due to small differences in the coefficients they might have cutoffs
    # at different points).
    res = explain_prediction(
        clf, docs[0], vec=vec, target_names=[target_names[1]], top=1000)
    expl, _ = format_as_all(res, clf)
    assert len(res.targets) == 1
    e = res.targets[0]
    assert e.target == 'comp.graphics'
    neg = get_all_features(e.feature_weights.neg)
    assert 'objective' in neg
    assert 'that' in neg
    assert 'comp.graphics' in expl
    assert 'objective' in expl
    assert 'that' in expl

    # HashingVectorizer with norm=None is "the same" as CountVectorizer,
    # so we can compare it and check that explanation is almost the same.
    count_vec = CountVectorizer()
    count_clf = LinearRegression()
github TeamHG-Memex / eli5 / tests / test_sklearn_vectorizers.py View on Github external
def test_explain_hashing_vectorizer(newsgroups_train_binary):
    # test that we can pass InvertableHashingVectorizer explicitly
    vec = HashingVectorizer(n_features=1000)
    ivec = InvertableHashingVectorizer(vec)
    clf = LogisticRegression(random_state=42)
    docs, y, target_names = newsgroups_train_binary
    ivec.fit([docs[0]])
    X = vec.fit_transform(docs)
    clf.fit(X, y)

    get_res = lambda **kwargs: explain_prediction(
        clf, docs[0], vec=ivec, target_names=target_names, top=20, **kwargs)
    res = get_res()
    check_explain_linear_binary(res, clf)
    assert res == get_res()
    res_vectorized = explain_prediction(
        clf, vec.transform([docs[0]])[0], vec=ivec, target_names=target_names,
        top=20, vectorized=True)
    pprint(res_vectorized)
    assert res_vectorized == _without_weighted_spans(res)

    assert res == get_res(
        feature_names=ivec.get_feature_names(always_signed=False))
github TeamHG-Memex / eli5 / tests / test_sklearn_explain_prediction.py View on Github external
def test_explain_tree_regressor_multitarget(reg):
    X, y = make_regression(n_samples=100, n_targets=3, n_features=10,
                           random_state=42)
    reg.fit(X, y)
    res = explain_prediction(reg, X[0])
    for expl in format_as_all(res, reg):
        for target in ['y0', 'y1', 'y2']:
            assert target in expl
        assert 'BIAS' in expl
        assert any('x%d' % i in expl for i in range(10))
    check_targets_scores(res)

    top_targets_res = explain_prediction(reg, X[0], top_targets=1)
    assert len(top_targets_res.targets) == 1
github TeamHG-Memex / eli5 / tests / test_sklearn_explain_weights.py View on Github external
def test_explain_linear_tuple_top(newsgroups_train):
    docs, y, target_names = newsgroups_train
    vec = TfidfVectorizer()
    clf = LogisticRegression(random_state=42)

    X = vec.fit_transform(docs)
    clf.fit(X, y)

    res_neg = explain_weights(clf, vec=vec, target_names=target_names, top=(0, 10))
    expl_neg, _ = format_as_all(res_neg, clf)

    for target in res_neg.targets:
        assert len(target.feature_weights.pos) == 0
        assert len(target.feature_weights.neg) == 10

    assert "+0." not in expl_neg

    res_pos = explain_weights(clf, vec=vec, target_names=target_names, top=(10, 2))
    format_as_all(res_pos, clf)

    for target in res_pos.targets:
        assert len(target.feature_weights.pos) == 10
        assert len(target.feature_weights.neg) == 2