How to use the annif.util.parse_sources function in annif

To help you get started, we’ve selected a few annif examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github NatLibFi / Annif / annif / backend / nn_ensemble.py View on Github external
def _corpus_to_vectors(self, corpus):
        # pass corpus through all source projects
        sources = [(annif.project.get_project(project_id), weight)
                   for project_id, weight
                   in annif.util.parse_sources(self.params['sources'])]

        score_vectors = []
        true_vectors = []
        for doc in corpus.documents:
            doc_scores = []
            for source_project, weight in sources:
                hits = source_project.suggest(doc.text)
                doc_scores.append(hits.vector * weight)
            score_vectors.append(np.array(doc_scores,
                                          dtype=np.float32).transpose())
            subjects = annif.corpus.SubjectSet((doc.uris, doc.labels))
            true_vectors.append(subjects.as_vector(self.project.subjects))
        # collect the results into a single vector, considering weights
        scores = np.array(score_vectors, dtype=np.float32)
        # collect the gold standard values into another vector
        true = np.array(true_vectors, dtype=np.float32)
github NatLibFi / Annif / annif / backend / pav.py View on Github external
def initialize(self):
        if self._models is not None:
            return  # already initialized
        self._models = {}
        sources = annif.util.parse_sources(self.params['sources'])
        for source_project_id, _ in sources:
            model_filename = self.MODEL_FILE_PREFIX + source_project_id
            path = os.path.join(self.datadir, model_filename)
            if os.path.exists(path):
                self.debug('loading PAV model from {}'.format(path))
                self._models[source_project_id] = joblib.load(path)
            else:
                raise NotInitializedException(
                    "PAV model file '{}' not found".format(path),
                    backend_id=self.backend_id)
github NatLibFi / Annif / annif / backend / vw_ensemble.py View on Github external
def _source_project_ids(self):
        sources = annif.util.parse_sources(self.params['sources'])
        return [project_id for project_id, _ in sources]
github NatLibFi / Annif / annif / backend / ensemble.py View on Github external
def _suggest(self, text, params):
        sources = annif.util.parse_sources(params['sources'])
        hits_from_sources = self._suggest_with_sources(text, sources)
        merged_hits = self._merge_hits_from_sources(hits_from_sources, params)
        self.debug('{} hits after merging'.format(len(merged_hits)))
        return merged_hits