Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_ratom_report_enron_027(
isolated_cli_runner, enron_dataset_part027, params, expected
):
msg_id = 2390436
result = generate_report(
params, enron_dataset_part027, isolated_cli_runner, expected
)
with db_session_from_cmd_out(result) as session:
# Verify total message count
assert session.query(Message).count() == 9297
# Get message contents from DB
msg = session.query(Message).filter_by(pff_identifier=msg_id).one()
headers, body = msg.headers, msg.body
if expected.with_messages:
# Access message directly and compare
archive_file = list(enron_dataset_part027.glob("*.pst"))[0]
with open_mail_archive(archive_file) as archive:
message = archive.get_message_by_id(msg_id)
assert cleanup_message_body(*archive.get_message_body(message)) == body
assert archive.get_message_headers(message) == headers
else:
def test_ratom_entities_enron_001(
isolated_cli_runner, enron_dataset_part001, params, expected
):
msg_id = 2097572
# Run entity extraction job with message content flag on
result = extract_entities(
params, enron_dataset_part001, isolated_cli_runner, expected
)
# Get message contents from DB
with db_session_from_cmd_out(result) as session:
msg = session.query(Message).filter_by(pff_identifier=msg_id).one()
headers, body = msg.headers, msg.body
# Access message directly and compare
archive_file = list(enron_dataset_part001.glob("*.pst"))[0]
with open_mail_archive(archive_file) as archive:
message = archive.get_message_by_id(msg_id)
assert cleanup_message_body(*archive.get_message_body(message)) == body
assert archive.get_message_headers(message) == headers
def test_ratom(cli_runner, params, expected):
result = cli_runner.invoke(ratom, args=params)
assert result.exit_code == 0
assert expected in result.output
with pytest.raises(ValueError):
db_session_from_cmd_out(result)
def test_ratom_entities_enron_004(
isolated_cli_runner,
enron_dataset_part004,
en_core_web_sm_2_3_1, # pylint: disable=unused-argument
params,
expected,
):
result = extract_entities(
params, enron_dataset_part004, isolated_cli_runner, expected
)
with db_session_from_cmd_out(result) as session:
# Sanity check
for entity in session.query(Entity)[:10]:
assert str(entity)
# Verify total entity count
assert session.query(Entity).count() == 173_736
# Verify count per entity type
results = (
session.query(Entity.label_, func.count(Entity.label_))
.group_by(Entity.label_)
.all()
)
assert results