How to use the libratom.lib.core.get_set_of_files function in libratom

To help you get started, we’ve selected a few libratom examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github libratom / libratom / tests / unit / test_libratom.py View on Github external
tmp_filename = "test.sqlite3"

    with TemporaryDirectory() as tmpdir:

        destination = Path(tmpdir) / tmp_filename
        Session = db_init(destination)

        with db_session(Session) as session, patch(
            "libratom.lib.report.FileReport",
            new=MagicMock(side_effect=KeyboardInterrupt),
        ):

            assert (
                scan_files(
                    files=get_set_of_files(directory_of_mbox_files),
                    session=session,
                    jobs=2,
                )
                == 1
            )
github libratom / libratom / tests / unit / test_libratom.py View on Github external
directory_of_mbox_files, function, patched, kwargs
):

    tmp_filename = "test.sqlite3"

    with TemporaryDirectory() as tmpdir:

        destination = Path(tmpdir) / tmp_filename
        Session = db_init(destination)

        with db_session(Session) as session, patch(
            patched, new=MagicMock(side_effect=KeyboardInterrupt),
        ):

            status = function(
                files=get_set_of_files(directory_of_mbox_files),
                session=session,
                **kwargs,
            )

        assert status == 1
github libratom / libratom / tests / unit / test_libratom.py View on Github external
def test_extract_entities_from_mbox_files(directory_of_mbox_files):

    tmp_filename = "test.sqlite3"

    with TemporaryDirectory() as tmpdir:

        destination = Path(tmpdir) / tmp_filename
        Session = db_init(destination)

        with db_session(Session) as session:

            status = extract_entities(
                files=get_set_of_files(directory_of_mbox_files),
                session=session,
                spacy_model=load_spacy_model(SPACY_MODELS.en_core_web_sm)[0],
                jobs=2,
            )

        assert status == 0
github libratom / libratom / tests / unit / test_libratom.py View on Github external
def test_attachments_mime_type_validation(enron_dataset, mock_progress_callback):

    files = get_set_of_files(enron_dataset)

    for res in get_messages(files, progress_callback=mock_progress_callback):
        attachments = res.get("attachments")
        if attachments:
            for attachment in attachments:
                try:
                    assert attachment.mime_type in MIME_TYPES
                except AssertionError:
                    # Some enron files have these obsolete attachment types
                    assert attachment.mime_type in [
                        "application/msexcell",
                        "application/mspowerpoint",
                    ]
github libratom / libratom / libratom / cli / subcommands.py View on Github external
out = out / OUTPUT_FILENAME_TEMPLATE.format(
            src.name,
            "entities",
            datetime.now()
            .isoformat(timespec="seconds")
            .translate(str.maketrans({"-": "", ":": ""})),
        )

    # Make DB file's parents if needed
    out.parent.mkdir(parents=True, exist_ok=True)

    # DB setup
    Session = db_init(out)

    # Get set of PST files from the source
    files = get_set_of_files(src)

    if not files:
        logger.info(f"No PST file found in {src}")

    # Compute and store file information
    with progress_bar_context(
        total=len(files),
        desc="Initial file scan",
        unit="files",
        color="green",
        leave=False,
    ) as file_bar, db_session(Session) as session:
        status = scan_files(
            files, session, jobs=jobs, progress_callback=file_bar.update
        )
github libratom / libratom / libratom / cli / subcommands.py View on Github external
out = out / OUTPUT_FILENAME_TEMPLATE.format(
            src.name,
            "report",
            datetime.now()
            .isoformat(timespec="seconds")
            .translate(str.maketrans({"-": "", ":": ""})),
        )

    # Make DB file's parents if needed
    out.parent.mkdir(parents=True, exist_ok=True)

    # DB setup
    Session = db_init(out)

    # Get set of PST files from the source
    files = get_set_of_files(src)

    if not files:
        logger.info(f"No PST file found in {src}")

    # Compute and store file information
    with progress_bar_context(
        total=len(files),
        desc="Initial file scan",
        unit="files",
        color="green",
        leave=False,
    ) as file_bar, db_session(Session) as session:
        status = scan_files(
            files, session, jobs=jobs, progress_callback=file_bar.update
        )