Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_createdb(args_createdb, monkeypatch):
"""Create empty test database."""
def mock_return_none(*args, **kwargs):
return None
monkeypatch.setattr(pyani_orm, "create_db", mock_return_none)
pyani_script.run_main(args_createdb)
:param args: Namespace of command-line arguments
This is graphical output for representing the ANI analysis results, and
takes the form of a heatmap, or heatmap with dendrogram.
"""
logger = logging.getLogger(__name__)
# Announce what's going on to the user
logger.info(termcolor("Generating graphical output for analyses", "red"))
logger.info("Writing output to: %s", args.outdir)
os.makedirs(args.outdir, exist_ok=True)
logger.info("Rendering method: %s", args.method)
# Connect to database session
logger.debug("Activating session for database: %s", args.dbpath)
session = pyani_orm.get_session(args.dbpath)
# Parse output formats
outfmts = args.formats.split(",")
logger.debug("Requested output formats: %s", outfmts)
# Work on each run:
run_ids = [int(run) for run in args.run_id.split(",")]
logger.debug("Generating graphics for runs: %s", run_ids)
for run_id in run_ids:
write_run_heatmaps(run_id, session, outfmts, args)
return 0
def write_run_heatmaps(
run_id: int, session, outfmts: List[str], args: Namespace
) -> None:
"""Write all heatmaps for a specified run to file.
:param run_id: int, run identifier in database session
:param session: Session, active SQLite session
:param outfmts: list of output format types
:param args: Namespace, command line arguments
"""
logger = logging.getLogger(__name__)
# Get results matrices for the run
logger.debug("Retrieving results matrices for run %s", run_id)
results = (
session.query(pyani_orm.Run).filter(pyani_orm.Run.run_id == args.run_id).first()
)
result_label_dict = pyani_orm.get_matrix_labels_for_run(session, args.run_id)
result_class_dict = pyani_orm.get_matrix_classes_for_run(session, args.run_id)
# Write heatmap for each results matrix
for matdata in [
MatrixData(*_)
for _ in [
("identity", pd.read_json(results.df_identity), {}),
("coverage", pd.read_json(results.df_coverage), {}),
("aln_lengths", pd.read_json(results.df_alnlength), {}),
("sim_errors", pd.read_json(results.df_simerrors), {}),
("hadamard", pd.read_json(results.df_hadamard), {}),
]
]:
write_heatmap(
logger.info(
termcolor("Generating classification for ANI run: %s", "red"), args.run_id
)
logger.info("\tWriting output to: %s", args.outdir)
logger.info(termcolor("\tCoverage threshold: %s", "cyan"), args.cov_min)
logger.info(
termcolor("\tInitial minimum identity threshold: %s", "cyan"), args.id_min
)
# Get results data for the specified run
logger.info("Acquiring results for run: %s", args.run_id)
logger.debug("Connecting to database: %s", args.dbpath)
session = pyani_orm.get_session(args.dbpath)
logger.debug("Retrieving results matrices")
results = (
session.query(pyani_orm.Run).filter(pyani_orm.Run.run_id == args.run_id).first()
)
result_label_dict = pyani_orm.get_matrix_labels_for_run(session, args.run_id)
# Generate initial graph on basis of results
logger.info("Constructing graph from results.")
initgraph = pyani_classify.build_graph_from_results(
results, result_label_dict, args.cov_min, args.id_min
)
logger.debug(
"Returned graph has %d nodes:\n\t%s",
len(initgraph),
"\n\t".join(n for n in initgraph),
)
logger.debug(
"Initial graph clique information:\n\t%s",
pyani_classify.analyse_cliques(initgraph),
"""Write all heatmaps for a specified run to file.
:param run_id: int, run identifier in database session
:param session: Session, active SQLite session
:param outfmts: list of output format types
:param args: Namespace, command line arguments
"""
logger = logging.getLogger(__name__)
# Get results matrices for the run
logger.debug("Retrieving results matrices for run %s", run_id)
results = (
session.query(pyani_orm.Run).filter(pyani_orm.Run.run_id == args.run_id).first()
)
result_label_dict = pyani_orm.get_matrix_labels_for_run(session, args.run_id)
result_class_dict = pyani_orm.get_matrix_classes_for_run(session, args.run_id)
# Write heatmap for each results matrix
for matdata in [
MatrixData(*_)
for _ in [
("identity", pd.read_json(results.df_identity), {}),
("coverage", pd.read_json(results.df_coverage), {}),
("aln_lengths", pd.read_json(results.df_alnlength), {}),
("sim_errors", pd.read_json(results.df_simerrors), {}),
("hadamard", pd.read_json(results.df_hadamard), {}),
]
]:
write_heatmap(
run_id, matdata, result_label_dict, result_class_dict, outfmts, args
)
write_distribution(run_id, matdata, outfmts, args)
logger = logging.getLogger(__name__)
# Tell the user what's going on
logger.info(
termcolor("Generating classification for ANI run: %s", "red"), args.run_id
)
logger.info("\tWriting output to: %s", args.outdir)
logger.info(termcolor("\tCoverage threshold: %s", "cyan"), args.cov_min)
logger.info(
termcolor("\tInitial minimum identity threshold: %s", "cyan"), args.id_min
)
# Get results data for the specified run
logger.info("Acquiring results for run: %s", args.run_id)
logger.debug("Connecting to database: %s", args.dbpath)
session = pyani_orm.get_session(args.dbpath)
logger.debug("Retrieving results matrices")
results = (
session.query(pyani_orm.Run).filter(pyani_orm.Run.run_id == args.run_id).first()
)
result_label_dict = pyani_orm.get_matrix_labels_for_run(session, args.run_id)
# Generate initial graph on basis of results
logger.info("Constructing graph from results.")
initgraph = pyani_classify.build_graph_from_results(
results, result_label_dict, args.cov_min, args.id_min
)
logger.debug(
"Returned graph has %d nodes:\n\t%s",
len(initgraph),
"\n\t".join(n for n in initgraph),
)
) -> None:
"""Write all heatmaps for a specified run to file.
:param run_id: int, run identifier in database session
:param session: Session, active SQLite session
:param outfmts: list of output format types
:param args: Namespace, command line arguments
"""
logger = logging.getLogger(__name__)
# Get results matrices for the run
logger.debug("Retrieving results matrices for run %s", run_id)
results = (
session.query(pyani_orm.Run).filter(pyani_orm.Run.run_id == args.run_id).first()
)
result_label_dict = pyani_orm.get_matrix_labels_for_run(session, args.run_id)
result_class_dict = pyani_orm.get_matrix_classes_for_run(session, args.run_id)
# Write heatmap for each results matrix
for matdata in [
MatrixData(*_)
for _ in [
("identity", pd.read_json(results.df_identity), {}),
("coverage", pd.read_json(results.df_coverage), {}),
("aln_lengths", pd.read_json(results.df_alnlength), {}),
("sim_errors", pd.read_json(results.df_simerrors), {}),
("hadamard", pd.read_json(results.df_hadamard), {}),
]
]:
write_heatmap(
run_id, matdata, result_label_dict, result_class_dict, outfmts, args
)
# If the database exists, raise an error rather than overwrite
if args.dbpath.is_file():
if not args.force:
logger.error("Database %s already exists (exiting)", args.dbpath)
raise SystemError(1)
logger.warning("Database %s already exists - overwriting", args.dbpath)
args.dbpath.unlink()
# If the path to the database doesn't exist, create it
if not args.dbpath.parent.is_dir():
logger.info("Creating database directory %s", args.dbpath.parent)
args.dbpath.parent.mkdir(parents=True, exist_ok=True)
# Create the empty database
logger.info("Creating pyani database at %s", args.dbpath)
pyani_orm.create_db(args.dbpath)
return 0
These will typically take an output path to a file or directory into which
the report will be written (whatever form it takes). By default, text
output is written in plain text format, but for some outputs this can
be modified by an 'excel' or 'html' format specifier, which writes outputs
in that format, where possible.
"""
logger = logging.getLogger(__name__)
# Output formats will apply across all tabular data requested
# Expect comma-separated format arguments, and turn them into an iterable
formats = process_formats(args)
logger.info(termcolor("Creating report output in formats: %s", "red"), formats)
# Declare which database is being used, and connect to session
logger.debug("Using database: %s", args.dbpath)
session = pyani_orm.get_session(args.dbpath)
# Report runs in the database
if args.show_runs:
statement = session.query(
Run.run_id, Run.name, Run.method, Run.date, Run.cmdline
).statement
headers = ["run ID", "name", "method", "date run", "command-line"]
report(args, session, formats, ReportParams("runs", statement, headers))
# Report genomes in the database
if args.show_genomes:
statement = session.query(
Genome.genome_id,
Genome.description,
Genome.path,
Genome.genome_hash,