How to use the annif.util.atomic_save function in annif

To help you get started, we’ve selected a few annif examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github NatLibFi / Annif / annif / backend / pav.py View on Github external
source_project_id, min_docs))
        source_project = annif.project.get_project(source_project_id)
        # suggest subjects for the training corpus
        scores, true = self._suggest_train_corpus(source_project, corpus)
        # create the concept-specific PAV regression models
        pav_regressions = {}
        for cid in range(len(source_project.subjects)):
            if true[:, cid].sum() < min_docs:
                continue  # don't create model b/c of too few examples
            reg = IsotonicRegression(out_of_bounds='clip')
            reg.fit(scores[:, cid], true[:, cid])
            pav_regressions[source_project.subjects[cid][0]] = reg
        self.info("created PAV model for {} concepts".format(
            len(pav_regressions)))
        model_filename = self.MODEL_FILE_PREFIX + source_project_id
        annif.util.atomic_save(
            pav_regressions,
            self.datadir,
            model_filename,
            method=joblib.dump)
github NatLibFi / Annif / annif / backend / tfidf.py View on Github external
def train(self, corpus, project):
        if corpus.is_empty():
            raise NotSupportedException(
                'Cannot train tfidf project with no documents')
        self.info('transforming subject corpus')
        subjects = self._generate_subjects_from_documents(corpus, project)
        self.info('creating vectorizer')
        self._vectorizer = TfidfVectorizer()
        veccorpus = self._vectorizer.fit_transform(subjects)
        annif.util.atomic_save(
            self._vectorizer,
            self.datadir,
            self.VECTORIZER_FILE,
            method=joblib.dump)
        self._create_index(veccorpus)
github NatLibFi / Annif / annif / backend / vw_base.py View on Github external
def _create_train_file(self, corpus, project):
        self.info('creating VW train file')
        examples = self._create_examples(corpus, project)
        annif.util.atomic_save(examples,
                               self.datadir,
                               self.TRAIN_FILE,
                               method=self._write_train_file)
github NatLibFi / Annif / annif / backend / vw_ensemble.py View on Github external
def _create_train_file(self, corpus, project):
        self.info('creating VW train file')
        exampledata = self._create_examples(corpus, project)

        subjects = [subj_id for subj_id, ex in exampledata]
        self._subject_freq = collections.Counter(subjects)
        annif.util.atomic_save(self._subject_freq,
                               self.datadir,
                               self.FREQ_FILE,
                               method=self._write_freq_file)

        examples = [ex for subj_id, ex in exampledata]
        annif.util.atomic_save(examples,
                               self.datadir,
                               self.TRAIN_FILE,
                               method=self._write_train_file)