Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_evaluation_batch(subject_index):
batch = annif.eval.EvaluationBatch(subject_index)
gold_set = annif.corpus.SubjectSet.from_string(
'\tarkeologit')
hits1 = annif.suggestion.ListSuggestionResult([
annif.suggestion.SubjectSuggestion(
uri='http://www.yso.fi/onto/yso/p10849',
label='arkeologit',
score=1.0)], subject_index)
batch.evaluate(hits1, gold_set)
hits2 = annif.suggestion.ListSuggestionResult([
annif.suggestion.SubjectSuggestion(
uri='http://www.yso.fi/onto/yso/p1747',
label='egyptologit',
score=1.0)], subject_index)
batch.evaluate(hits2, gold_set)
results = batch.results()
assert results['Precision (doc avg)'] == 0.5
assert results['Recall (doc avg)'] == 0.5
assert results['LRAP'] >= 0.50
assert results['LRAP'] <= 0.51
def test_list_suggestions_vector(document_corpus, subject_index):
suggestions = ListSuggestionResult(
[
SubjectSuggestion(
uri='http://www.yso.fi/onto/yso/p7141',
label='sinetit',
score=1.0),
SubjectSuggestion(
uri='http://www.yso.fi/onto/yso/p6479',
label='viikingit',
score=0.5)],
subject_index)
assert isinstance(suggestions.vector, np.ndarray)
assert len(suggestions.vector) == len(subject_index)
assert suggestions.vector.sum() == 1.5
for subject_id, score in enumerate(suggestions.vector):
if subject_index[subject_id][1] == 'sinetit':
assert score == 1.0
def generate_suggestions(n, subject_index):
suggestions = []
for i in range(n):
uri = 'http://example.org/{}'.format(i)
suggestions.append(SubjectSuggestion(uri=uri,
label='hit {}'.format(i),
score=1.0 / (i + 1)))
return ListSuggestionResult(suggestions, subject_index)
def filter(self, limit=None, threshold=0.0):
hits = sorted(self.hits, key=lambda hit: hit.score, reverse=True)
if limit is not None:
hits = hits[:limit]
return ListSuggestionResult([hit for hit in hits
if hit.score >= threshold and
hit.score > 0.0],
self._subject_index)
def _response_to_result(self, response):
try:
return ListSuggestionResult(
[SubjectSuggestion(uri=h['id'],
label=h['label'],
score=h['probability'])
for h in response['topics']
if h['probability'] > 0.0], self.project.subjects)
except (TypeError, ValueError) as err:
self.warning("Problem interpreting JSON data: {}".format(err))
return ListSuggestionResult([], self.project.subjects)
def _vector_to_hits(self):
hits = []
for subject_id in self.subject_order:
score = self._vector[subject_id]
if score <= 0.0:
continue # we can skip the remaining ones
subject = self._subject_index[subject_id]
hits.append(
SubjectSuggestion(
uri=subject[0],
label=subject[1],
score=score))
return ListSuggestionResult(hits, self._subject_index)
def _suggest_chunks(self, chunktexts, project):
results = []
for chunktext in chunktexts:
exampletext = self._inputs_to_exampletext(project, chunktext)
if not exampletext:
continue
example = ' {}'.format(exampletext)
result = self._model.predict(example)
results.append(self._convert_result(result, project))
if not results: # empty result
return ListSuggestionResult(
hits=[], subject_index=project.subjects)
return VectorSuggestionResult(
np.array(results).mean(axis=0), project.subjects)