How to use the annif.project function in annif

To help you get started, we’ve selected a few annif examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github NatLibFi / Annif / tests / test_backend_vw_ensemble.py View on Github external
def test_vw_ensemble_train_and_learn(app, datadir, tmpdir):
    vw_ensemble_type = annif.backend.get_backend("vw_ensemble")
    vw_ensemble = vw_ensemble_type(
        backend_id='vw_ensemble',
        config_params={'sources': 'dummy-en'},
        datadir=str(datadir))

    tmpfile = tmpdir.join('document.tsv')
    tmpfile.write("dummy\thttp://example.org/dummy\n" +
                  "another\thttp://example.org/dummy\n" +
                  "none\thttp://example.org/none")
    document_corpus = annif.corpus.DocumentFile(str(tmpfile))
    project = annif.project.get_project('dummy-en')

    with app.app_context():
        vw_ensemble.train(document_corpus, project)
    assert datadir.join('vw-train.txt').exists()
    assert datadir.join('vw-train.txt').size() > 0
    assert datadir.join('subject-freq.json').exists()
    assert datadir.join('subject-freq.json').size() > 0
    assert datadir.join('vw-model').exists()
    assert datadir.join('vw-model').size() > 0

    # test online learning
    modelfile = datadir.join('vw-model')
    freqfile = datadir.join('subject-freq.json')

    old_size = modelfile.size()
    old_mtime = modelfile.mtime()
github NatLibFi / Annif / tests / test_backend_nn_ensemble.py View on Github external
def test_nn_ensemble_train_and_learn(app, tmpdir):
    project = annif.project.get_project('dummy-en')
    nn_ensemble_type = annif.backend.get_backend("nn_ensemble")
    nn_ensemble = nn_ensemble_type(
        backend_id='nn_ensemble',
        config_params={'sources': 'dummy-en'},
        project=project)

    tmpfile = tmpdir.join('document.tsv')
    tmpfile.write("dummy\thttp://example.org/dummy\n" +
                  "another\thttp://example.org/dummy\n" +
                  "none\thttp://example.org/none")
    document_corpus = annif.corpus.DocumentFile(str(tmpfile))

    with app.app_context():
        nn_ensemble.train(document_corpus)

    datadir = py.path.local(project.datadir)
github NatLibFi / Annif / annif / cli.py View on Github external
def get_project(project_id):
    """
    Helper function to get a project by ID and bail out if it doesn't exist"""
    try:
        return annif.project.get_project(project_id, min_access=Access.hidden)
    except ValueError:
        click.echo(
            "No projects found with id \'{0}\'.".format(project_id),
            err=True)
        sys.exit(1)
github NatLibFi / Annif / annif / backend / ensemble.py View on Github external
def _suggest_with_sources(self, text, sources):
        hits_from_sources = []
        for project_id, weight in sources:
            source_project = annif.project.get_project(project_id)
            hits = source_project.suggest(text)
            self.debug(
                'Got {} hits from project {}'.format(
                    len(hits), source_project.project_id))
            norm_hits = self._normalize_hits(hits, source_project)
            hits_from_sources.append(
                annif.suggestion.WeightedSuggestion(
                    hits=norm_hits, weight=weight))
        return hits_from_sources
github NatLibFi / Annif / annif / backend / nn_ensemble.py View on Github external
def _corpus_to_vectors(self, corpus):
        # pass corpus through all source projects
        sources = [(annif.project.get_project(project_id), weight)
                   for project_id, weight
                   in annif.util.parse_sources(self.params['sources'])]

        score_vectors = []
        true_vectors = []
        for doc in corpus.documents:
            doc_scores = []
            for source_project, weight in sources:
                hits = source_project.suggest(doc.text)
                doc_scores.append(hits.vector * weight)
            score_vectors.append(np.array(doc_scores,
                                          dtype=np.float32).transpose())
            subjects = annif.corpus.SubjectSet((doc.uris, doc.labels))
            true_vectors.append(subjects.as_vector(self.project.subjects))
        # collect the results into a single vector, considering weights
        scores = np.array(score_vectors, dtype=np.float32)
github NatLibFi / Annif / annif / rest.py View on Github external
def show_project(project_id):
    """return a single project formatted according to Swagger spec"""

    try:
        project = annif.project.get_project(
            project_id, min_access=Access.hidden)
    except ValueError:
        return project_not_found_error(project_id)
    return project.dump()
github NatLibFi / Annif / annif / operations.py View on Github external
def list_projects():
    """
    List available projects.

    Usage: annif list-projects

    REST equivalent: GET /projects/
    """

    return [proj.dump() for proj in annif.project.get_projects().values()]
github NatLibFi / Annif / annif / rest.py View on Github external
def list_projects():
    """return a dict with projects formatted according to Swagger spec"""

    return {
        'projects': [
            proj.dump() for proj in annif.project.get_projects(
                min_access=Access.public).values()]}
github NatLibFi / Annif / annif / __init__.py View on Github external
if config_name is None:
        if os.environ.get('FLASK_RUN_FROM_CLI') == 'true':
            config_name = 'annif.default_config.Config'
        else:
            config_name = 'annif.default_config.ProductionConfig'
    logger.debug('creating app with configuration %s', config_name)
    cxapp.app.config.from_object(config_name)
    cxapp.app.config.from_envvar('ANNIF_SETTINGS', silent=True)

    cxapp.add_api('annif.yaml')

    # add CORS support
    CORS(cxapp.app)

    if cxapp.app.config['INITIALIZE_PROJECTS']:
        annif.project.initialize_projects(cxapp.app)

    # register the views via blueprints
    from annif.views import bp
    cxapp.app.register_blueprint(bp)

    # return the Flask app
    return cxapp.app
github NatLibFi / Annif / annif / operations.py View on Github external
def analyze(project_id, text, limit, threshold):
    """Analyze a document and return a list of AnalysisHit objects."""

    try:
        project = annif.project.get_project(project_id)
    except ValueError:
        return "No projects found with id \'{0}\'.".format(project_id)

    return [hit.dump() for hit in project.analyze(text, limit, threshold)]