How to use annif - 10 common examples

To help you get started, we’ve selected a few annif examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github NatLibFi / Annif / tests / test_backend_omikuji.py View on Github external
def test_omikuji_create_train_file(tmpdir, project, datadir):
    tmpfile = tmpdir.join('document.tsv')
    tmpfile.write("nonexistent\thttp://example.com/nonexistent\n" +
                  "arkeologia\thttp://www.yso.fi/onto/yso/p1265\n" +
                  "...\thttp://example.com/none")
    corpus = annif.corpus.DocumentFile(str(tmpfile))
    omikuji_type = annif.backend.get_backend('omikuji')
    omikuji = omikuji_type(
        backend_id='omikuji',
        config_params={},
        project=project)
    input = (doc.text for doc in corpus.documents)
    veccorpus = omikuji.create_vectorizer(input, {})
    omikuji._create_train_file(veccorpus, corpus)
    assert datadir.join('omikuji-train.txt').exists()
    traindata = datadir.join('omikuji-train.txt').read().splitlines()
    assert len(traindata) == 2  # header + 1 example
    examples, features, labels = map(int, traindata[0].split())
    assert examples == 1
    assert features == 2
    assert labels == 125
github NatLibFi / Annif / tests / test_backend_http.py View on Github external
def test_http_suggest(app, project):
    with unittest.mock.patch('requests.post') as mock_request:
        # create a mock response whose .json() method returns the list that we
        # define here
        mock_response = unittest.mock.Mock()
        mock_response.json.return_value = [
            {'uri': 'http://example.org/http', 'label': 'http', 'score': 1.0}]
        mock_request.return_value = mock_response

        http_type = annif.backend.get_backend("http")
        http = http_type(
            backend_id='http',
            config_params={
                'endpoint': 'http://api.example.org/analyze',
                'project': 'dummy'},
            datadir=app.config['DATADIR'])
        result = http.suggest('this is some text', project=project)
        assert len(result) == 1
        assert result[0].uri == 'http://example.org/http'
        assert result[0].label == 'http'
        assert result[0].score == 1.0
github NatLibFi / Annif / tests / test_backend_omikuji.py View on Github external
def test_omikuji_suggest(project):
    omikuji_type = annif.backend.get_backend('omikuji')
    omikuji = omikuji_type(
        backend_id='omikuji',
        config_params={'limit': 8},
        project=project)

    results = omikuji.suggest("""Arkeologiaa sanotaan joskus myös
        muinaistutkimukseksi tai muinaistieteeksi. Se on humanistinen tiede
        tai oikeammin joukko tieteitä, jotka tutkivat ihmisen menneisyyttä.
        Tutkimusta tehdään analysoimalla muinaisjäännöksiä eli niitä jälkiä,
        joita ihmisten toiminta on jättänyt maaperään tai vesistöjen
        pohjaan.""")

    assert len(results) > 0
    assert len(results) <= 8
    assert 'http://www.yso.fi/onto/yso/p1265' in [
        result.uri for result in results]
github NatLibFi / Annif / tests / test_backend_fasttext.py View on Github external
def test_fasttext_train_nodocuments(tmpdir, datadir, project):
    fasttext_type = annif.backend.get_backend("fasttext")
    fasttext = fasttext_type(
        backend_id='fasttext',
        config_params={
            'limit': 50,
            'dim': 100,
            'lr': 0.25,
            'epoch': 20,
            'loss': 'hs'},
        datadir=str(datadir))

    empty_file = tmpdir.ensure('empty.tsv')
    empty_document_corpus = annif.corpus.DocumentFile(str(empty_file))

    with pytest.raises(NotSupportedException) as excinfo:
        fasttext.train(empty_document_corpus, project)
    assert 'training backend fasttext with no documents' in str(excinfo.value)
github NatLibFi / Annif / tests / test_backend_pav.py View on Github external
def test_pav_train(app, datadir, tmpdir, project):
    pav_type = annif.backend.get_backend("pav")
    pav = pav_type(
        backend_id='pav',
        config_params={'limit': 50, 'min-docs': 2, 'sources': 'dummy-fi'},
        datadir=str(datadir))

    tmpfile = tmpdir.join('document.tsv')
    tmpfile.write("dummy\thttp://example.org/dummy\n" +
                  "another\thttp://example.org/dummy\n" +
                  "none\thttp://example.org/none")
    document_corpus = annif.corpus.DocumentFile(str(tmpfile))

    with app.app_context():
        pav.train(document_corpus, project)
    assert datadir.join('pav-model-dummy-fi').exists()
    assert datadir.join('pav-model-dummy-fi').size() > 0
github NatLibFi / Annif / tests / test_backend_tfidf.py View on Github external
def test_tfidf_analyze(datadir, project_with_vectorizer):
    tfidf_type = annif.backend.get_backend("tfidf")
    tfidf = tfidf_type(
        backend_id='tfidf',
        params={'limit': 10},
        datadir=str(datadir))

    results = tfidf.analyze("""Arkeologiaa sanotaan joskus myös
        muinaistutkimukseksi tai muinaistieteeksi. Se on humanistinen tiede
        tai oikeammin joukko tieteitä, jotka tutkivat ihmisen menneisyyttä.
        Tutkimusta tehdään analysoimalla muinaisjäännöksiä eli niitä jälkiä,
        joita ihmisten toiminta on jättänyt maaperään tai vesistöjen
        pohjaan.""", project_with_vectorizer)

    assert len(results) == 10
    assert 'http://www.yso.fi/onto/yso/p1265' in [
        result.uri for result in results]
    assert 'arkeologia' in [result.label for result in results]
github NatLibFi / Annif / tests / test_backend_omikuji.py View on Github external
def test_omikuji_train(datadir, document_corpus, project):
    omikuji_type = annif.backend.get_backend('omikuji')
    omikuji = omikuji_type(
        backend_id='omikuji',
        config_params={},
        project=project)

    # verify that training works even if there is a preexisting model directory
    # - to simulate this we will create an empty directory instead
    datadir.join('omikuji-model').ensure(dir=True)
    assert not datadir.join('omikuji-model').listdir()  # empty dir

    omikuji.train(document_corpus)
    assert omikuji._model is not None
    assert datadir.join('omikuji-model').exists()
    assert datadir.join('omikuji-model').listdir()  # non-empty dir
github NatLibFi / Annif / tests / test_backend_vw_ensemble.py View on Github external
def test_vw_ensemble_default_params(datadir, project):
    vw_type = annif.backend.get_backend("vw_ensemble")
    vw = vw_type(
        backend_id='vw_ensemble',
        config_params={},
        datadir=str(datadir))

    expected_default_params = {
        'limit': 100,
        'discount_rate': 0.01,
        'loss_function': 'squared',
    }
    actual_params = vw.params
    for param, val in expected_default_params.items():
        assert param in actual_params and actual_params[param] == val
github NatLibFi / Annif / tests / test_backend_nn_ensemble.py View on Github external
def test_nn_ensemble_suggest_no_model(project):
    nn_ensemble_type = annif.backend.get_backend('nn_ensemble')
    nn_ensemble = nn_ensemble_type(
        backend_id='nn_ensemble',
        config_params={'sources': 'dummy-en'},
        project=project)

    with pytest.raises(NotInitializedException):
        results = nn_ensemble.suggest("example text")
github NatLibFi / Annif / tests / test_backend.py View on Github external
def test_get_backend_dummy(app, project):
    dummy_type = annif.backend.get_backend("dummy")
    dummy = dummy_type(backend_id='dummy', config_params={},
                       datadir=app.config['DATADIR'])
    result = dummy.suggest(text='this is some text', project=project)
    assert len(result) == 1
    assert result[0].uri == 'http://example.org/dummy'
    assert result[0].label == 'dummy'
    assert result[0].score == 1.0