Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_get_message_by_id(sample_pst_file):
with PffArchive(sample_pst_file) as archive:
for message in archive.messages():
msg = archive.get_message_by_id(message.identifier)
assert msg.identifier == message.identifier
assert archive.format_message(msg) == archive.format_message(message)
def test_extract_message_attachments(enron_dataset_part002):
"""Checking 3 known attachments, to validate the attachment extraction process
"""
digests = {
47685: "d48232614b01e56014293854abbb5db3",
47717: "cf8be7cd3e6e14307972246e2942c9d1",
47749: "081e6b66dc89671ff6460adac94dbab1",
}
with PffArchive(
next(enron_dataset_part002.glob("*.pst"))
) as archive, TemporaryDirectory() as tmp_dir:
# Get message by ID
node = archive.tree.get_node(2128676)
message = node.data
for att in message.attachments:
# Read attachment as bytes
rbuf = att.read_buffer(att.size)
# Save attachment
filepath = (
Path(tmp_dir) / f"attachment_{message.identifier}_{att.identifier}"
)
filepath.write_bytes(rbuf)
def test_pffarchive_iterate_over_messages(sample_pst_file, bfs):
with PffArchive(sample_pst_file) as archive:
for message in archive.messages(bfs=bfs):
assert message.plain_text_body
def test_pffarchive_load_from_invalid_type():
with pytest.raises(TypeError):
_ = PffArchive(1)
def test_pffarchive_load_from_file_object(sample_pst_file):
with sample_pst_file.open(mode="rb") as f, PffArchive(f) as archive:
assert len(list(archive.messages())) == 2668
def test_extract_enron_messages(enron_dataset):
nb_extracted = 0
total_size = 0
for pst_file in enron_dataset.glob("**/*.pst"):
try:
# Iterate over messages and copy message string
with PffArchive(pst_file) as archive:
for message in archive.messages():
_ = archive.format_message(message)
# Increment message count
nb_extracted += 1
# Add file size to running total
total_size += pst_file.stat().st_size
except Exception as exc: # pylint: disable=broad-except
logger.info(f"Inspecting {pst_file}")
logger.exception(exc)
logger.info(
f"Extracted {nb_extracted} messages from a total of {humanfriendly.format_size(total_size)}"
)
def test_get_message_body(message, body_type):
assert PffArchive().get_message_body(message)[1] is body_type
def test_get_attachment_metadata(mock_cls):
message = MagicMock(identifier=123, attachments=[mock_cls(name="foo", size="0")])
assert PffArchive().get_attachment_metadata(message)[0].mime_type is None
def test_pffarchive_format_message(enron_dataset_part004, empty_message):
for pst_file in enron_dataset_part004.glob("*.pst"):
with PffArchive(pst_file) as archive:
for message in archive.messages():
# The assertion here doesn't matter as much as
# not getting an exception from python's email parsing module
assert email.message_from_string(
archive.format_message(message), policy=policy.default
) or not archive.format_message(message)
assert PffArchive.format_message(empty_message) == ""