Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
sys.exit()
genome_files = self._genome_files(options.genome_nt_dir, options.genome_ext)
if not self._check_nuclotide_seqs(genome_files):
self.logger.warning('All files must contain nucleotide sequences.')
sys.exit()
make_sure_path_exists(options.output_dir)
# get coverage information
if not options.coverage_file:
if not options.bam_files:
self.logger.warning('One or more BAM files must be specified in order to calculate coverage profiles.')
coverage_file = None
else:
coverage = Coverage(options.cpus)
coverage_file = os.path.join(options.output_dir, 'coverage.tsv')
coverage.run(options.bam_files, coverage_file, options.cov_all_reads, options.cov_min_align, options.cov_max_edit_dist)
self.logger.info('Coverage profiles written to: %s' % coverage_file)
else:
check_file_exists(options.coverage_file)
coverage_file = options.coverage_file
# get tetranucleotide signatures
if not options.tetra_file:
tetra = Tetranucleotide(options.cpus)
tetra_file = os.path.join(options.output_dir, 'tetra.tsv')
signatures = tetra.run(options.scaffold_file)
tetra.write(signatures, tetra_file)
self.logger.info('Tetranucleotide signatures written to: %s' % tetra_file)
else:
tetra_file = options.tetra_file
genome_files : list of str
Fasta files with binned scaffolds.
tetra_file : str
Tetranucleotide signatures for scaffolds.
coverage_file : str
Coverage profiles for scaffolds
output_file : str
Output file for scaffolds statistics.
"""
tetra = Tetranucleotide(self.cpus)
signatures = tetra.read(tetra_file)
cov_profiles = None
if coverage_file:
coverage = Coverage(self.cpus)
cov_profiles, _ = coverage.read(coverage_file)
# determine bin assignment for each scaffold
self.logger.info('Determining scaffold statistics.')
scaffold_id_genome_id = {}
for gf in genome_files:
genome_id = remove_extension(gf)
for scaffold_id, _seq in seq_io.read_seq(gf):
scaffold_id_genome_id[scaffold_id] = genome_id
# write out scaffold statistics
fout = open(output_file, 'w')
fout.write('Scaffold id\tGenome Id\tGC\tLength (bp)')
if cov_profiles: