How to use the annif.backend.backend.AnnifBackend function in annif

To help you get started, we’ve selected a few annif examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github NatLibFi / Annif / annif / backend / omikuji.py View on Github external
"""Annif backend using the Omikuji classifier"""

import omikuji
import os.path
import shutil
import annif.util
from annif.suggestion import SubjectSuggestion, ListSuggestionResult
from annif.exception import NotInitializedException, NotSupportedException
from . import backend
from . import mixins


class OmikujiBackend(mixins.TfidfVectorizerMixin, backend.AnnifBackend):
    """Omikuji based backend for Annif"""
    name = "omikuji"
    needs_subject_index = True

    # defaults for uninitialized instances
    _model = None

    TRAIN_FILE = 'omikuji-train.txt'
    MODEL_FILE = 'omikuji-model'

    DEFAULT_PARAMS = {
        'min_df': 1,
        'cluster_balanced': True,
        'cluster_k': 2,
        'max_depth': 20,
    }
github NatLibFi / Annif / annif / backend / ensemble.py View on Github external
"""Ensemble backend that combines results from multiple projects"""


import annif.suggestion
import annif.project
import annif.util
from . import backend


class EnsembleBackend(backend.AnnifBackend):
    """Ensemble backend that combines results from multiple projects"""
    name = "ensemble"

    def _normalize_hits(self, hits, source_project):
        """Hook for processing hits from backends. Intended to be overridden
        by subclasses."""
        return hits

    def _suggest_with_sources(self, text, sources):
        hits_from_sources = []
        for project_id, weight in sources:
            source_project = annif.project.get_project(project_id)
            hits = source_project.suggest(text)
            self.debug(
                'Got {} hits from project {}'.format(
                    len(hits), source_project.project_id))
github NatLibFi / Annif / annif / backend / vw_ensemble.py View on Github external
def default_params(self):
        params = backend.AnnifBackend.DEFAULT_PARAMS.copy()
        params.update(self.DEFAULT_PARAMS)
        params.update({param: default_val
                       for param, (_, default_val) in self.VW_PARAMS.items()
                       if default_val is not None})
        return params
github NatLibFi / Annif / annif / backend / maui.py View on Github external
"""Maui backend that makes calls to a Maui Server instance using its API"""


import time
import os.path
import json
import requests
import requests.exceptions
from annif.exception import ConfigurationException
from annif.exception import NotSupportedException
from annif.exception import OperationFailedException
from annif.suggestion import SubjectSuggestion, ListSuggestionResult
from . import backend


class MauiBackend(backend.AnnifBackend):
    name = "maui"

    TRAIN_FILE = 'maui-train.jsonl'

    @property
    def endpoint(self):
        try:
            return self.params['endpoint']
        except KeyError:
            raise ConfigurationException(
                "endpoint must be set in project configuration",
                backend_id=self.backend_id)

    @property
    def tagger(self):
        try:
github NatLibFi / Annif / annif / backend / tfidf.py View on Github external
def write(self, text):
        self._buffer.append(text)
        if len(self._buffer) >= self.BUFFER_SIZE:
            self.flush()

    def read(self):
        if not self._created:
            # file was never created - we can simply return the buffer content
            return "\n".join(self._buffer)
        else:
            with open(self._path, 'r', encoding='utf-8') as subjfile:
                return subjfile.read() + "\n" + "\n".join(self._buffer)


class TFIDFBackend(backend.AnnifBackend):
    """TF-IDF vector space similarity based backend for Annif"""
    name = "tfidf"
    needs_subject_index = True

    # defaults for uninitialized instances
    _vectorizer = None
    _index = None

    VECTORIZER_FILE = 'vectorizer'
    INDEX_FILE = 'tfidf-index'

    def _generate_subjects_from_documents(self, corpus, project):
        with tempfile.TemporaryDirectory() as tempdir:
            subject_buffer = {}
            for subject_id in range(len(project.subjects)):
                subject_buffer[subject_id] = SubjectBuffer(tempdir,
github NatLibFi / Annif / annif / backend / omikuji.py View on Github external
def default_params(self):
        params = backend.AnnifBackend.DEFAULT_PARAMS.copy()
        params.update(self.DEFAULT_PARAMS)
        return params
github NatLibFi / Annif / annif / backend / fasttext.py View on Github external
"""Annif backend using the fastText classifier"""

import collections
import os.path
import annif.util
from annif.suggestion import SubjectSuggestion, ListSuggestionResult
from annif.exception import NotInitializedException, NotSupportedException
import fastText
from . import backend
from . import mixins


class FastTextBackend(mixins.ChunkingBackend, backend.AnnifBackend):
    """fastText backend for Annif"""

    name = "fasttext"
    needs_subject_index = True

    FASTTEXT_PARAMS = {
        'lr': float,
        'lrUpdateRate': int,
        'dim': int,
        'ws': int,
        'epoch': int,
        'minCount': int,
        'neg': int,
        'wordNgrams': int,
        'loss': str,
        'bucket': int,
github NatLibFi / Annif / annif / backend / vw_multi.py View on Github external
def default_params(self):
        params = backend.AnnifBackend.DEFAULT_PARAMS.copy()
        params.update(mixins.ChunkingBackend.DEFAULT_PARAMS)
        params.update(self.DEFAULT_PARAMS)
        params.update({param: default_val
                       for param, (_, default_val) in self.VW_PARAMS.items()
                       if default_val is not None})
        return params
github NatLibFi / Annif / annif / backend / backend.py View on Github external
return self._suggest(text, project, params=beparams)

    def debug(self, message):
        """Log a debug message from this backend"""
        logger.debug("Backend {}: {}".format(self.backend_id, message))

    def info(self, message):
        """Log an info message from this backend"""
        logger.info("Backend {}: {}".format(self.backend_id, message))

    def warning(self, message):
        """Log a warning message from this backend"""
        logger.warning("Backend {}: {}".format(self.backend_id, message))


class AnnifLearningBackend(AnnifBackend):
    """Base class for Annif backends that can perform online learning"""

    @abc.abstractmethod
    def learn(self, corpus, project):
        """further train the model on the given document or subject corpus"""
        pass  # pragma: no cover
github NatLibFi / Annif / annif / backend / http.py View on Github external
"""HTTP/REST client backend that makes calls to a web service
and returns the results"""


import requests
import requests.exceptions
from annif.suggestion import SubjectSuggestion, ListSuggestionResult
from . import backend


class HTTPBackend(backend.AnnifBackend):
    name = "http"

    def _suggest(self, text, params):
        data = {'text': text}
        if 'project' in params:
            data['project'] = params['project']

        try:
            req = requests.post(params['endpoint'], data=data)
            req.raise_for_status()
        except requests.exceptions.RequestException as err:
            self.warning("HTTP request failed: {}".format(err))
            return ListSuggestionResult([], self.project.subjects)

        try:
            response = req.json()