How to use the pyani.anib.make_job_graph function in pyani

To help you get started, we’ve selected a few pyani examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github widdowquinn / pyani / tests / test_anib.py View on Github external
def test_blastall_graph(path_fna_all, tmp_path, fragment_length):
    """Create jobgraph for legacy BLASTN jobs."""
    fragresult = anib.fragment_fasta_files(path_fna_all, tmp_path, fragment_length)
    blastcmds = anib.make_blastcmd_builder("ANIblastall", tmp_path)
    jobgraph = anib.make_job_graph(path_fna_all, fragresult[0], blastcmds)
    # We check that the main script job is a blastn job, and that there
    # is a single dependency, which is a makeblastdb job
    for job in jobgraph:
        assert job.script.startswith("blastall -p blastn")
        assert len(job.dependencies) == 1
        assert job.dependencies[0].script.startswith("formatdb")
github widdowquinn / pyani / tests / test_concordance.py View on Github external
def test_aniblastall_concordance(
    paths_concordance_fna,
    path_concordance_jspecies,
    tolerance_anib_hi,
    fragment_length,
    tmp_path,
):
    """Check ANIblastall results are concordant with JSpecies."""
    # Get lengths of input genomes
    orglengths = pyani_files.get_sequence_lengths(paths_concordance_fna)

    # Perform ANIblastall on the input directory contents
    fragfiles, fraglengths = anib.fragment_fasta_files(
        paths_concordance_fna, tmp_path, fragment_length
    )
    jobgraph = anib.make_job_graph(
        paths_concordance_fna,
        fragfiles,
        anib.make_blastcmd_builder("ANIblastall", tmp_path),
    )
    assert 0 == run_mp.run_dependency_graph(jobgraph)  # Jobs must run correctly

    # Process BLAST output
    result_pid = anib.process_blast(
        tmp_path, orglengths, fraglengths, mode="ANIblastall"
    ).percentage_identity

    # Compare JSpecies output to results
    result_pid = (result_pid.sort_index(axis=0).sort_index(axis=1) * 100.0).values
    tgt_pid = parse_jspecies(path_concordance_jspecies)["ANIb"].values
    assert result_pid - tgt_pid == pytest.approx(0, abs=tolerance_anib_hi)
github widdowquinn / pyani / tests / test_anib.py View on Github external
def test_blastn_graph(path_fna_all, tmp_path, fragment_length):
    """Create jobgraph for BLASTN+ jobs."""
    fragresult = anib.fragment_fasta_files(path_fna_all, tmp_path, fragment_length)
    blastcmds = anib.make_blastcmd_builder("ANIb", tmp_path)
    jobgraph = anib.make_job_graph(path_fna_all, fragresult[0], blastcmds)
    # We check that the main script job is a blastn job, and that there
    # is a single dependency, which is a makeblastdb job
    for job in jobgraph:
        assert job.script.startswith("blastn")
        assert len(job.dependencies) == 1
        assert job.dependencies[0].script.startswith("makeblastdb")
github widdowquinn / pyani / tests / test_multiprocessing.py View on Github external
def test_dependency_graph_run(self):
        """Test that module runs dependency graph."""
        fragresult = anib.fragment_fasta_files(self.infiles, self.outdir, self.fraglen)
        blastcmds = anib.make_blastcmd_builder("ANIb", self.outdir)
        jobgraph = anib.make_job_graph(self.infiles, fragresult[0], blastcmds)
        result = run_multiprocessing.run_dependency_graph(jobgraph)
        self.assertEqual(0, result)
github widdowquinn / pyani / tests / test_concordance.py View on Github external
tmp_path,
):
    """Check ANIb results are concordant with JSpecies.

    We expect ANIb results to be quite different, as the BLASTN
    algorithm changed substantially between BLAST and BLAST+ (the
    megaBLAST algorithm is now the default for BLASTN)
    """
    # Get lengths of input genomes
    orglengths = pyani_files.get_sequence_lengths(paths_concordance_fna)

    # Build and run BLAST jobs
    fragfiles, fraglengths = anib.fragment_fasta_files(
        paths_concordance_fna, tmp_path, fragment_length
    )
    jobgraph = anib.make_job_graph(
        paths_concordance_fna, fragfiles, anib.make_blastcmd_builder("ANIb", tmp_path)
    )
    assert 0 == run_mp.run_dependency_graph(jobgraph)  # Jobs must run correctly

    # Process BLAST output
    result_pid = anib.process_blast(
        tmp_path, orglengths, fraglengths, mode="ANIb"
    ).percentage_identity

    # Compare JSpecies output to results. We do this in two blocks,
    # masked according to whether the expected result is greater than
    # a threshold separating "low" from "high" identity comparisons.
    result_pid = result_pid.sort_index(axis=0).sort_index(axis=1) * 100.0
    lo_result = result_pid.mask(result_pid >= threshold_anib_lo_hi).fillna(0).values
    hi_result = result_pid.mask(result_pid < threshold_anib_lo_hi).fillna(0).values
github widdowquinn / pyani / pyani / scripts / average_nucleotide_identity.py View on Github external
:param infiles:  iterable of sequence files to compare
    :param blastdir:  path of directory to fragment BLASTN databases

    Runs BLAST database creation and comparisons, returning the cumulative
    return values of the BLAST tool subprocesses, and the fragment sizes for
    each input file
    """
    if not args.skip_blastn:
        logger.info("Fragmenting input files, and writing to %s", args.outdirname)
        fragfiles, fraglengths = make_sequence_fragments(
            args, logger, infiles, blastdir
        )

        # Run BLAST database-building and executables from a jobgraph
        logger.info("Creating job dependency graph")
        jobgraph = anib.make_job_graph(
            infiles, fragfiles, anib.make_blastcmd_builder(args.method, blastdir)
        )
        if args.scheduler == "multiprocessing":
            logger.info("Running dependency graph with multiprocessing")
            cumval = run_mp.run_dependency_graph(jobgraph, logger=logger)
            if cumval > 0:
                logger.warning(
                    f"At least one BLAST run failed. {args.method} may fail. Please investigate."
                )
            else:
                logger.info("All multiprocessing jobs complete.")
        elif args.scheduler == "SGE":
            logger.info("Running dependency graph with SGE")
            run_sge.run_dependency_graph(jobgraph, logger=logger)
        else:
            logger.error(f"Scheduler {args.scheduler} not recognised (exiting)")