Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def generate_suggestions(n, subject_index):
suggestions = []
for i in range(n):
uri = 'http://example.org/{}'.format(i)
suggestions.append(SubjectSuggestion(uri=uri,
label='hit {}'.format(i),
score=1.0 / (i + 1)))
return ListSuggestionResult(suggestions, subject_index)
def test_evaluation_batch(subject_index):
batch = annif.eval.EvaluationBatch(subject_index)
gold_set = annif.corpus.SubjectSet.from_string(
'\tarkeologit')
hits1 = annif.suggestion.ListSuggestionResult([
annif.suggestion.SubjectSuggestion(
uri='http://www.yso.fi/onto/yso/p10849',
label='arkeologit',
score=1.0)], subject_index)
batch.evaluate(hits1, gold_set)
hits2 = annif.suggestion.ListSuggestionResult([
annif.suggestion.SubjectSuggestion(
uri='http://www.yso.fi/onto/yso/p1747',
label='egyptologit',
score=1.0)], subject_index)
batch.evaluate(hits2, gold_set)
results = batch.results()
assert results['Precision (doc avg)'] == 0.5
assert results['Recall (doc avg)'] == 0.5
assert results['LRAP'] >= 0.50
assert results['LRAP'] <= 0.51
assert results['True positives'] == 1
def test_hitfilter_zero_score(subject_index):
origsuggestions = ListSuggestionResult(
[SubjectSuggestion(uri='uri', label='label', score=0.0)],
subject_index)
suggestions = SuggestionFilter()(origsuggestions)
assert isinstance(suggestions, SuggestionResult)
assert len(suggestions) == 0
def test_list_suggestions_vector(document_corpus, subject_index):
suggestions = ListSuggestionResult(
[
SubjectSuggestion(
uri='http://www.yso.fi/onto/yso/p7141',
label='sinetit',
score=1.0),
SubjectSuggestion(
uri='http://www.yso.fi/onto/yso/p6479',
label='viikingit',
score=0.5)],
subject_index)
assert isinstance(suggestions.vector, np.ndarray)
assert len(suggestions.vector) == len(subject_index)
assert suggestions.vector.sum() == 1.5
for subject_id, score in enumerate(suggestions.vector):
if subject_index[subject_id][1] == 'sinetit':
assert score == 1.0
elif subject_index[subject_id][1] == 'viikingit':
assert score == 0.5
def _response_to_result(self, response):
try:
return ListSuggestionResult(
[SubjectSuggestion(uri=h['id'],
label=h['label'],
score=h['probability'])
for h in response['topics']
if h['probability'] > 0.0], self.project.subjects)
except (TypeError, ValueError) as err:
self.warning("Problem interpreting JSON data: {}".format(err))
return ListSuggestionResult([], self.project.subjects)
def _suggest_chunks(self, chunktexts, project):
limit = int(self.params['limit'])
chunklabels, chunkscores = self._predict_chunks(
chunktexts, project, limit)
label_scores = collections.defaultdict(float)
for labels, scores in zip(chunklabels, chunkscores):
for label, score in zip(labels, scores):
label_scores[label] += score
best_labels = sorted([(score, label)
for label, score in label_scores.items()],
reverse=True)
results = []
for score, label in best_labels[:limit]:
subject = self._label_to_subject(project, label)
results.append(SubjectSuggestion(
uri=subject[0],
label=subject[1],
score=score / len(chunktexts)))
return ListSuggestionResult(results, project.subjects)
def _normalize_hits(self, hits, source_project):
reg_models = self._get_model(source_project.project_id)
pav_result = []
for hit in hits.hits:
if hit.uri in reg_models:
score = reg_models[hit.uri].predict([hit.score])[0]
else: # default to raw score
score = hit.score
pav_result.append(
annif.suggestion.SubjectSuggestion(
uri=hit.uri,
label=hit.label,
score=score))
pav_result.sort(key=lambda hit: hit.score, reverse=True)
return annif.suggestion.ListSuggestionResult(
pav_result, source_project.subjects)
def _suggest(self, text, params):
self.debug('Suggesting subjects for text "{}..." (len={})'.format(
text[:20], len(text)))
vector = self.vectorizer.transform([text])
feature_values = [(col, vector[row, col])
for row, col in zip(*vector.nonzero())]
results = []
limit = int(self.params['limit'])
for subj_id, score in self._model.predict(feature_values, top_k=limit):
subject = self.project.subjects[subj_id]
results.append(SubjectSuggestion(
uri=subject[0],
label=subject[1],
score=score))
return ListSuggestionResult(results, self.project.subjects)
self.warning("HTTP request failed: {}".format(err))
return ListSuggestionResult([], self.project.subjects)
try:
response = req.json()
except ValueError as err:
self.warning("JSON decode failed: {}".format(err))
return ListSuggestionResult([], self.project.subjects)
if 'results' in response:
results = response['results']
else:
results = response
try:
return ListSuggestionResult([SubjectSuggestion(uri=h['uri'],
label=h['label'],
score=h['score'])
for h in results
if h['score'] > 0.0],
self.project.subjects)
except (TypeError, ValueError) as err:
self.warning("Problem interpreting JSON data: {}".format(err))
return ListSuggestionResult([], self.project.subjects)