Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_ratom_entities_enron_001(
isolated_cli_runner, enron_dataset_part001, params, expected
):
msg_id = 2097572
# Run entity extraction job with message content flag on
result = extract_entities(
params, enron_dataset_part001, isolated_cli_runner, expected
)
# Get message contents from DB
with db_session_from_cmd_out(result) as session:
msg = session.query(Message).filter_by(pff_identifier=msg_id).one()
headers, body = msg.headers, msg.body
# Access message directly and compare
archive_file = list(enron_dataset_part001.glob("*.pst"))[0]
with open_mail_archive(archive_file) as archive:
message = archive.get_message_by_id(msg_id)
assert cleanup_message_body(*archive.get_message_body(message)) == body
assert archive.get_message_headers(message) == headers
def test_ratom_report_enron_027(
isolated_cli_runner, enron_dataset_part027, params, expected
):
msg_id = 2390436
result = generate_report(
params, enron_dataset_part027, isolated_cli_runner, expected
)
with db_session_from_cmd_out(result) as session:
# Verify total message count
assert session.query(Message).count() == 9297
# Get message contents from DB
msg = session.query(Message).filter_by(pff_identifier=msg_id).one()
headers, body = msg.headers, msg.body
if expected.with_messages:
# Access message directly and compare
archive_file = list(enron_dataset_part027.glob("*.pst"))[0]
with open_mail_archive(archive_file) as archive:
message = archive.get_message_by_id(msg_id)
assert cleanup_message_body(*archive.get_message_body(message)) == body
assert archive.get_message_headers(message) == headers
else:
assert headers is None
assert body is None
if error:
logger.info(
"Skipping message {message_id} from {filepath}".format(**res)
)
logger.debug(error)
continue
# Extract results
entities = res.pop("entities")
message_id = res.pop("message_id")
filepath = res.pop("filepath")
attachments = res.pop("attachments")
# Create new message instance
message = Message(pff_identifier=message_id, **res)
# Link message to a file_report
try:
file_report = (
session.query(FileReport).filter_by(path=filepath).one()
)
except Exception as exc:
file_report = None
logger.info(
f"Unable to link message id {message_id} to a file. Error: {exc}"
)
message.file_report = file_report
session.add(message)
# Record attachment info
with_content=include_message_contents,
with_headers=include_message_contents,
):
# Extract results
message_id = msg_info.pop("message_id")
filepath = msg_info.pop("filepath")
attachments = msg_info.pop("attachments")
if include_message_contents:
msg_info["body"] = cleanup_message_body(
msg_info["body"], msg_info.pop("body_type")
)
# Create new message instance
message = Message(pff_identifier=message_id, **msg_info)
# Link message to a file_report
try:
file_report = session.query(FileReport).filter_by(path=filepath).one()
except Exception as exc:
file_report = None
logger.info(
f"Unable to link message id {message_id} to a file. Error: {exc}"
)
message.file_report = file_report
session.add(message)
# Record attachment info
session.add_all(
[