How to use the annif.suggestion.SubjectSuggestion function in annif

To help you get started, we’ve selected a few annif examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github NatLibFi / Annif / tests / test_suggestion.py View on Github external
def generate_suggestions(n, subject_index):
    suggestions = []
    for i in range(n):
        uri = 'http://example.org/{}'.format(i)
        suggestions.append(SubjectSuggestion(uri=uri,
                                             label='hit {}'.format(i),
                                             score=1.0 / (i + 1)))
    return ListSuggestionResult(suggestions, subject_index)
github NatLibFi / Annif / tests / test_eval.py View on Github external
def test_evaluation_batch(subject_index):
    batch = annif.eval.EvaluationBatch(subject_index)

    gold_set = annif.corpus.SubjectSet.from_string(
        '\tarkeologit')
    hits1 = annif.suggestion.ListSuggestionResult([
        annif.suggestion.SubjectSuggestion(
            uri='http://www.yso.fi/onto/yso/p10849',
            label='arkeologit',
            score=1.0)], subject_index)
    batch.evaluate(hits1, gold_set)
    hits2 = annif.suggestion.ListSuggestionResult([
        annif.suggestion.SubjectSuggestion(
            uri='http://www.yso.fi/onto/yso/p1747',
            label='egyptologit',
            score=1.0)], subject_index)
    batch.evaluate(hits2, gold_set)
    results = batch.results()
    assert results['Precision (doc avg)'] == 0.5
    assert results['Recall (doc avg)'] == 0.5
    assert results['LRAP'] >= 0.50
    assert results['LRAP'] <= 0.51
    assert results['True positives'] == 1
github NatLibFi / Annif / tests / test_suggestion.py View on Github external
def test_hitfilter_zero_score(subject_index):
    origsuggestions = ListSuggestionResult(
        [SubjectSuggestion(uri='uri', label='label', score=0.0)],
        subject_index)
    suggestions = SuggestionFilter()(origsuggestions)
    assert isinstance(suggestions, SuggestionResult)
    assert len(suggestions) == 0
github NatLibFi / Annif / tests / test_suggestion.py View on Github external
def test_list_suggestions_vector(document_corpus, subject_index):
    suggestions = ListSuggestionResult(
        [
            SubjectSuggestion(
                uri='http://www.yso.fi/onto/yso/p7141',
                label='sinetit',
                score=1.0),
            SubjectSuggestion(
                uri='http://www.yso.fi/onto/yso/p6479',
                label='viikingit',
                score=0.5)],
        subject_index)
    assert isinstance(suggestions.vector, np.ndarray)
    assert len(suggestions.vector) == len(subject_index)
    assert suggestions.vector.sum() == 1.5
    for subject_id, score in enumerate(suggestions.vector):
        if subject_index[subject_id][1] == 'sinetit':
            assert score == 1.0
        elif subject_index[subject_id][1] == 'viikingit':
            assert score == 0.5
github NatLibFi / Annif / annif / backend / maui.py View on Github external
def _response_to_result(self, response):
        try:
            return ListSuggestionResult(
                [SubjectSuggestion(uri=h['id'],
                                   label=h['label'],
                                   score=h['probability'])
                 for h in response['topics']
                 if h['probability'] > 0.0], self.project.subjects)
        except (TypeError, ValueError) as err:
            self.warning("Problem interpreting JSON data: {}".format(err))
            return ListSuggestionResult([], self.project.subjects)
github NatLibFi / Annif / annif / backend / fasttext.py View on Github external
def _suggest_chunks(self, chunktexts, project):
        limit = int(self.params['limit'])
        chunklabels, chunkscores = self._predict_chunks(
            chunktexts, project, limit)
        label_scores = collections.defaultdict(float)
        for labels, scores in zip(chunklabels, chunkscores):
            for label, score in zip(labels, scores):
                label_scores[label] += score
        best_labels = sorted([(score, label)
                              for label, score in label_scores.items()],
                             reverse=True)

        results = []
        for score, label in best_labels[:limit]:
            subject = self._label_to_subject(project, label)
            results.append(SubjectSuggestion(
                uri=subject[0],
                label=subject[1],
                score=score / len(chunktexts)))
        return ListSuggestionResult(results, project.subjects)
github NatLibFi / Annif / annif / backend / pav.py View on Github external
def _normalize_hits(self, hits, source_project):
        reg_models = self._get_model(source_project.project_id)
        pav_result = []
        for hit in hits.hits:
            if hit.uri in reg_models:
                score = reg_models[hit.uri].predict([hit.score])[0]
            else:  # default to raw score
                score = hit.score
            pav_result.append(
                annif.suggestion.SubjectSuggestion(
                    uri=hit.uri,
                    label=hit.label,
                    score=score))
        pav_result.sort(key=lambda hit: hit.score, reverse=True)
        return annif.suggestion.ListSuggestionResult(
            pav_result, source_project.subjects)
github NatLibFi / Annif / annif / backend / omikuji.py View on Github external
def _suggest(self, text, params):
        self.debug('Suggesting subjects for text "{}..." (len={})'.format(
            text[:20], len(text)))
        vector = self.vectorizer.transform([text])
        feature_values = [(col, vector[row, col])
                          for row, col in zip(*vector.nonzero())]
        results = []
        limit = int(self.params['limit'])
        for subj_id, score in self._model.predict(feature_values, top_k=limit):
            subject = self.project.subjects[subj_id]
            results.append(SubjectSuggestion(
                uri=subject[0],
                label=subject[1],
                score=score))
        return ListSuggestionResult(results, self.project.subjects)
github NatLibFi / Annif / annif / backend / http.py View on Github external
self.warning("HTTP request failed: {}".format(err))
            return ListSuggestionResult([], self.project.subjects)

        try:
            response = req.json()
        except ValueError as err:
            self.warning("JSON decode failed: {}".format(err))
            return ListSuggestionResult([], self.project.subjects)

        if 'results' in response:
            results = response['results']
        else:
            results = response

        try:
            return ListSuggestionResult([SubjectSuggestion(uri=h['uri'],
                                                           label=h['label'],
                                                           score=h['score'])
                                         for h in results
                                         if h['score'] > 0.0],
                                        self.project.subjects)
        except (TypeError, ValueError) as err:
            self.warning("Problem interpreting JSON data: {}".format(err))
            return ListSuggestionResult([], self.project.subjects)