How to use the pyani.pyani_files.get_sequence_lengths function in pyani

To help you get started, we’ve selected a few pyani examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github widdowquinn / pyani / tests / test_concordance.py View on Github external
# jobgroup generation in the anim.py module. That's a TODO.
    ncmds, fcmds = anim.generate_nucmer_commands(paths_concordance_fna, tmp_path)
    (tmp_path / "nucmer_output").mkdir(exist_ok=True, parents=True)
    run_mp.multiprocessing_run(ncmds)

    # delta-filter commands need to be treated with care for
    # Travis-CI. Our cluster won't take redirection or semicolon
    # separation in individual commands, but the wrapper we wrote
    # for this (delta_filter_wrapper.py) can't be called under
    # Travis-CI. So we must deconstruct the commands below
    dfcmds = [
        " > ".join([" ".join(fcmd.split()[1:-1]), fcmd.split()[-1]]) for fcmd in fcmds
    ]
    run_mp.multiprocessing_run(dfcmds)

    orglengths = pyani_files.get_sequence_lengths(paths_concordance_fna)

    results = anim.process_deltadir(tmp_path / "nucmer_output", orglengths)
    result_pid = results.percentage_identity
    result_pid.to_csv(tmp_path / "pyani_anim.tab", sep="\t")

    # Compare JSpecies output to results
    result_pid = (result_pid.sort_index(axis=0).sort_index(axis=1) * 100.0).values
    tgt_pid = parse_jspecies(path_concordance_jspecies)["ANIm"].values

    assert result_pid - tgt_pid == pytest.approx(0, abs=tolerance_anim)
github widdowquinn / pyani / tests / test_concordance.py View on Github external
def test_aniblastall_concordance(
    paths_concordance_fna,
    path_concordance_jspecies,
    tolerance_anib_hi,
    fragment_length,
    tmp_path,
):
    """Check ANIblastall results are concordant with JSpecies."""
    # Get lengths of input genomes
    orglengths = pyani_files.get_sequence_lengths(paths_concordance_fna)

    # Perform ANIblastall on the input directory contents
    fragfiles, fraglengths = anib.fragment_fasta_files(
        paths_concordance_fna, tmp_path, fragment_length
    )
    jobgraph = anib.make_job_graph(
        paths_concordance_fna,
        fragfiles,
        anib.make_blastcmd_builder("ANIblastall", tmp_path),
    )
    assert 0 == run_mp.run_dependency_graph(jobgraph)  # Jobs must run correctly

    # Process BLAST output
    result_pid = anib.process_blast(
        tmp_path, orglengths, fraglengths, mode="ANIblastall"
    ).percentage_identity
github widdowquinn / pyani / tests / test_anib.py View on Github external
def test_parse_legacy_blastdir(anib_output_dir):
    """Parses directory of legacy BLAST output."""
    orglengths = pyani_files.get_sequence_lengths(anib_output_dir.infiles)
    fraglengths = anib.get_fraglength_dict(anib_output_dir.fragfiles)
    result = anib.process_blast(
        anib_output_dir.legacyblastdir, orglengths, fraglengths, mode="ANIblastall"
    )
    assert_frame_equal(
        result.percentage_identity.sort_index(1).sort_index(),
        anib_output_dir.legacyblastresult.sort_index(1).sort_index(),
    )
github widdowquinn / pyani / tests / test_concordance.py View on Github external
paths_concordance_fna,
    path_concordance_jspecies,
    tolerance_anib_hi,
    tolerance_anib_lo,
    threshold_anib_lo_hi,
    fragment_length,
    tmp_path,
):
    """Check ANIb results are concordant with JSpecies.

    We expect ANIb results to be quite different, as the BLASTN
    algorithm changed substantially between BLAST and BLAST+ (the
    megaBLAST algorithm is now the default for BLASTN)
    """
    # Get lengths of input genomes
    orglengths = pyani_files.get_sequence_lengths(paths_concordance_fna)

    # Build and run BLAST jobs
    fragfiles, fraglengths = anib.fragment_fasta_files(
        paths_concordance_fna, tmp_path, fragment_length
    )
    jobgraph = anib.make_job_graph(
        paths_concordance_fna, fragfiles, anib.make_blastcmd_builder("ANIb", tmp_path)
    )
    assert 0 == run_mp.run_dependency_graph(jobgraph)  # Jobs must run correctly

    # Process BLAST output
    result_pid = anib.process_blast(
        tmp_path, orglengths, fraglengths, mode="ANIb"
    ).percentage_identity

    # Compare JSpecies output to results. We do this in two blocks,
github widdowquinn / pyani / tests / test_anim.py View on Github external
def test_deltadir_parsing(delta_output_dir):
    """Process test directory of .delta files into ANIResults."""
    seqfiles = pyani_files.get_fasta_files(delta_output_dir.seqdir)
    orglengths = pyani_files.get_sequence_lengths(seqfiles)
    result = anim.process_deltadir(delta_output_dir.deltadir, orglengths)
    assert_frame_equal(
        result.percentage_identity.sort_index(1).sort_index(),
        delta_output_dir.deltaresult.sort_index(1).sort_index(),
    )
github widdowquinn / pyani / tests / test_anib.py View on Github external
def test_parse_blastdir(anib_output_dir):
    """Parse directory of BLAST+ output."""
    orglengths = pyani_files.get_sequence_lengths(anib_output_dir.infiles)
    fraglengths = anib.get_fraglength_dict(anib_output_dir.fragfiles)
    result = anib.process_blast(
        anib_output_dir.blastdir, orglengths, fraglengths, mode="ANIb"
    )
    assert_frame_equal(
        result.percentage_identity.sort_index(1).sort_index(),
        anib_output_dir.blastresult.sort_index(1).sort_index(),
    )
github widdowquinn / pyani / pyani / scripts / average_nucleotide_identity.py View on Github external
else:
        # Run ANI comparisons
        logger.info("Identifying FASTA files in %s", args.indirname)
        infiles = pyani_files.get_fasta_files(args.indirname)
        logger.info("Input files:\n\t%s", "\n\t".join([str(_) for _ in infiles]))

        # Are we subsampling? If so, make the selection here
        if args.subsample:
            infiles = subsample_input(args, logger, infiles)
            logger.info(
                "Sampled input files:\n\t%s", "\n\t".join([str(_) for _ in infiles])
            )

        # Get lengths of input sequences
        logger.info("Processing input sequence lengths")
        org_lengths = pyani_files.get_sequence_lengths(infiles)
        seqlens = os.linesep.join(
            ["\t%s: %d" % (k, v) for k, v in list(org_lengths.items())]
        )
        logger.info("Sequence lengths:\n%s", seqlens)

        # Run appropriate method on the contents of the input directory,
        # and write out corresponding results.
        logger.info("Carrying out %s analysis", args.method)
        if args.method == "TETRA":
            results = method_function(args, logger, infiles)
        else:
            results = method_function(args, logger, infiles, org_lengths)
        write(args, logger, results)

    # Do we want graphical output?
    if args.graphics or args.rerender: