Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
:param filter_exe:
:param maxmatch: Boolean flag indicating whether to use NUCmer's -maxmatch
option. If not, the -mum option is used instead
The split into a tuple was made necessary by changes to SGE/OGE.
The delta-filter command must now be run as a dependency of the NUCmer
command, and be wrapped in a Python script to capture STDOUT.
NOTE: This command-line writes output data to a subdirectory of the passed
outdir, called "nucmer_output".
"""
# Cast path strings to pathlib.Path for safety
fname1, fname2 = Path(fname1), Path(fname2)
# Compile commands
outsubdir = outdir / pyani_config.ALIGNDIR["ANIm"]
outprefix = outsubdir / f"{fname1.stem}_vs_{fname2.stem}"
if maxmatch:
mode = "--maxmatch"
else:
mode = "--mum"
nucmercmd = "{0} {1} -p {2} {3} {4}".format(
nucmer_exe, mode, outprefix, fname1, fname2
)
# There's a subtle pathlib.Path issue, here. We must use string concatenation to add suffixes
# to the outprefix files, as using path.with_suffix() instead can replace part of the filestem
# in those cases where there is a period in the stem (this occurs frequently as it is part
# of the NCBI notation for genome assembly versions)
filtercmd = (
f"delta_filter_wrapper.py {filter_exe} -1 {str(outprefix) + '.delta'} "
f"{str(outprefix) + '.filter'}"
)
default=None,
type=str,
help="Additional arguments for qsub",
)
parser.add_argument(
"--maxmatch",
dest="maxmatch",
action="store_true",
default=False,
help="Override MUMmer to allow all NUCmer matches",
)
parser.add_argument(
"--nucmer_exe",
dest="nucmer_exe",
action="store",
default=pyani_config.NUCMER_DEFAULT,
type=Path,
help="Path to NUCmer executable",
)
parser.add_argument(
"--filter_exe",
dest="filter_exe",
action="store",
default=pyani_config.FILTER_DEFAULT,
type=Path,
help="Path to delta-filter executable",
)
parser.add_argument(
"--blastn_exe",
dest="blastn_exe",
action="store",
default=pyani_config.BLASTN_DEFAULT,
import scipy.cluster.hierarchy as sch
import scipy.spatial.distance as distance
from scipy.stats import gaussian_kde
from pyani import pyani_config
# Specify matplotlib backend. This *must* be done before pyplot import, but
# raises errors with flake8 etc. So we comment out the specific error
matplotlib.use("Agg")
import matplotlib.pyplot as plt # noqa: E402,E501 # pylint: disable=wrong-import-position,wrong-import-order,ungrouped-imports
import matplotlib.gridspec as gridspec # noqa: E402,E501 # pylint: disable=wrong-import-position,wrong-import-order,ungrouped-imports
# Register Matplotlib colourmaps
plt.register_cmap(cmap=pyani_config.CMAP_SPBND_BURD)
plt.register_cmap(cmap=pyani_config.CMAP_HADAMARD_BURD)
plt.register_cmap(cmap=pyani_config.CMAP_BURD)
# Matplotlib version dictates bug fixes
MPLVERSION = matplotlib.__version__
# helper for cleaning up matplotlib axes by removing ticks etc.
def clean_axis(axis):
"""Remove ticks, tick labels, and frame from axis.
:param axis:
"""
axis.get_xaxis().set_ticks([])
axis.get_yaxis().set_ticks([])
for spine in list(axis.spines.values()):
cblist = []
for name in [str(_) for _ in dfr.index[dend["dendrogram"]["leaves"]]]:
if name in params.classes:
cblist.append(classdict[params.classes[name]])
elif name in classdict:
cblist.append(classdict[name])
else: # Catches genomes with no assigned class
cblist.append(0)
colbar = pd.Series(cblist)
# Create colourbar axis - could capture if needed
if orientation == "row":
cbaxes = fig.add_subplot(dend["gridspec"][0, 1])
cbaxes.imshow(
[[cbar] for cbar in colbar.values],
cmap=plt.get_cmap(pyani_config.MPL_CBAR),
interpolation="nearest",
aspect="auto",
origin="lower",
)
else:
cbaxes = fig.add_subplot(dend["gridspec"][1, 0])
cbaxes.imshow(
[colbar],
cmap=plt.get_cmap(pyani_config.MPL_CBAR),
interpolation="nearest",
aspect="auto",
origin="lower",
)
clean_axis(cbaxes)
return colbar
def construct_nucmer_cmdline(
fname1: Path,
fname2: Path,
outdir: Path = Path("."),
nucmer_exe: Path = pyani_config.NUCMER_DEFAULT,
filter_exe: Path = pyani_config.FILTER_DEFAULT,
maxmatch: bool = False,
) -> Tuple[str, str]:
"""Return a tuple of corresponding NUCmer and delta-filter commands.
:param fname1: path to query FASTA file
:param fname2: path to subject FASTA file
:param outdir: path to output directory
:param nucmer_exe:
:param filter_exe:
:param maxmatch: Boolean flag indicating whether to use NUCmer's -maxmatch
option. If not, the -mum option is used instead
The split into a tuple was made necessary by changes to SGE/OGE.
The delta-filter command must now be run as a dependency of the NUCmer
command, and be wrapped in a Python script to capture STDOUT.
help="output analysis results directory",
)
# Optional arguments
parser.add_argument(
"--dbpath",
action="store",
dest="dbpath",
default=Path(".pyani/pyanidb"),
type=Path,
help="path to pyani database",
)
parser.add_argument(
"--blastn_exe",
dest="blastn_exe",
action="store",
default=pyani_config.BLASTN_DEFAULT,
type=Path,
help="path to blastn executable",
)
parser.add_argument(
"--format_exe",
dest="format_exe",
action="store",
default=pyani_config.MAKEBLASTDB_DEFAULT,
type=Path,
help="path to makeblastdb executable",
)
parser.add_argument(
"--fragsize",
dest="fragsize",
action="store",
type=int,
def construct_blastn_cmdline(
fname1: Path,
fname2: Path,
outdir: Path,
blastn_exe: Path = pyani_config.BLASTN_DEFAULT,
) -> str:
"""Return a single blastn command.
:param fname1:
:param fname2:
:param outdir:
:param blastn_exe: str, path to blastn executable
"""
prefix = outdir / f"{fname1.stem.replace('-fragments', '')}_vs_{fname2.stem}"
return (
f"{blastn_exe} -out {prefix}.blast_tab -query {fname1} -db {fname2} "
"-xdrop_gap_final 150 -dust no -evalue 1e-15 -max_target_seqs 1 -outfmt "
def get_version(nucmer_exe: Path = pyani_config.NUCMER_DEFAULT) -> str:
"""Return NUCmer package version as a string.
:param nucmer_exe: path to NUCmer executable
We expect NUCmer to return a string on STDERR as
.. code-block:: bash
$ nucmer
NUCmer (NUCleotide MUMmer) version 3.1
we concatenate this with the OS name.
"""
cmdline = [nucmer_exe, "-V"] # type: List
result = subprocess.run(
cmdline, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True
def generate_nucmer_jobs(
filenames: List[Path],
outdir: Path = Path("."),
nucmer_exe: Path = pyani_config.NUCMER_DEFAULT,
filter_exe: Path = pyani_config.FILTER_DEFAULT,
maxmatch: bool = False,
jobprefix: str = "ANINUCmer",
):
"""Return list of Jobs describing NUCmer command-lines for ANIm.
:param filenames: Iterable, Paths to input FASTA files
:param outdir: str, path to output directory
:param nucmer_exe: str, location of the nucmer binary
:param filter_exe:
:param maxmatch: Boolean flag indicating to use NUCmer's -maxmatch option
:param jobprefix:
Loop over all FASTA files, generating Jobs describing NUCmer command lines
for each pairwise comparison.
"""
outfmts: List[str],
args: Namespace,
) -> None:
"""Write a single heatmap for a pyani run.
:param run_id: int, run_id for this run
:param matdata: MatrixData object for this heatmap
:param result_labels: dict of result labels
:param result_classes: dict of result classes
:param args: Namespace for command-line arguments
:param outfmts: list of output formats for files
"""
logger = logging.getLogger(__name__)
logger.info("Writing %s matrix heatmaps", matdata.name)
cmap = pyani_config.get_colormap(matdata.data, matdata.name)
for fmt in outfmts:
outfname = Path(args.outdir) / f"matrix_{matdata.name}_run{run_id}.{fmt}"
logger.debug("\tWriting graphics to %s", outfname)
params = pyani_graphics.Params(cmap, result_labels, result_classes)
# Draw heatmap
GMETHODS[args.method](
matdata.data,
outfname,
title=f"matrix_{matdata.name}_run{run_id}",
params=params,
)
# Be tidy with matplotlib caches
plt.close("all")