How to use the libratom.lib.database.db_session_from_cmd_out function in libratom

To help you get started, we’ve selected a few libratom examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github libratom / libratom / tests / unit / test_cli.py View on Github external
def test_ratom_report_enron_027(
    isolated_cli_runner, enron_dataset_part027, params, expected
):
    msg_id = 2390436

    result = generate_report(
        params, enron_dataset_part027, isolated_cli_runner, expected
    )

    with db_session_from_cmd_out(result) as session:
        # Verify total message count
        assert session.query(Message).count() == 9297

        # Get message contents from DB
        msg = session.query(Message).filter_by(pff_identifier=msg_id).one()
        headers, body = msg.headers, msg.body

        if expected.with_messages:
            # Access message directly and compare
            archive_file = list(enron_dataset_part027.glob("*.pst"))[0]
            with open_mail_archive(archive_file) as archive:
                message = archive.get_message_by_id(msg_id)
                assert cleanup_message_body(*archive.get_message_body(message)) == body
                assert archive.get_message_headers(message) == headers

        else:
github libratom / libratom / tests / unit / test_cli.py View on Github external
def test_ratom_entities_enron_001(
    isolated_cli_runner, enron_dataset_part001, params, expected
):
    msg_id = 2097572

    # Run entity extraction job with message content flag on
    result = extract_entities(
        params, enron_dataset_part001, isolated_cli_runner, expected
    )

    # Get message contents from DB
    with db_session_from_cmd_out(result) as session:
        msg = session.query(Message).filter_by(pff_identifier=msg_id).one()
        headers, body = msg.headers, msg.body

    # Access message directly and compare
    archive_file = list(enron_dataset_part001.glob("*.pst"))[0]
    with open_mail_archive(archive_file) as archive:
        message = archive.get_message_by_id(msg_id)
        assert cleanup_message_body(*archive.get_message_body(message)) == body
        assert archive.get_message_headers(message) == headers
github libratom / libratom / tests / unit / test_cli.py View on Github external
def test_ratom(cli_runner, params, expected):

    result = cli_runner.invoke(ratom, args=params)
    assert result.exit_code == 0
    assert expected in result.output

    with pytest.raises(ValueError):
        db_session_from_cmd_out(result)
github libratom / libratom / tests / unit / test_cli.py View on Github external
def test_ratom_entities_enron_004(
    isolated_cli_runner,
    enron_dataset_part004,
    en_core_web_sm_2_3_1,  # pylint: disable=unused-argument
    params,
    expected,
):
    result = extract_entities(
        params, enron_dataset_part004, isolated_cli_runner, expected
    )

    with db_session_from_cmd_out(result) as session:

        # Sanity check
        for entity in session.query(Entity)[:10]:
            assert str(entity)

        # Verify total entity count
        assert session.query(Entity).count() == 173_736

        # Verify count per entity type
        results = (
            session.query(Entity.label_, func.count(Entity.label_))
            .group_by(Entity.label_)
            .all()
        )

        assert results