How to use the refinem.coverage.Coverage function in refinem

To help you get started, we’ve selected a few refinem examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dparks1134 / RefineM / refinem / main.py View on Github external
sys.exit()

        genome_files = self._genome_files(options.genome_nt_dir, options.genome_ext)
        if not self._check_nuclotide_seqs(genome_files):
            self.logger.warning('All files must contain nucleotide sequences.')
            sys.exit()

        make_sure_path_exists(options.output_dir)

        # get coverage information
        if not options.coverage_file:
            if not options.bam_files:
                self.logger.warning('One or more BAM files must be specified in order to calculate coverage profiles.')
                coverage_file = None
            else:
                coverage = Coverage(options.cpus)
                coverage_file = os.path.join(options.output_dir, 'coverage.tsv')
                coverage.run(options.bam_files, coverage_file, options.cov_all_reads, options.cov_min_align, options.cov_max_edit_dist)
                self.logger.info('Coverage profiles written to: %s' % coverage_file)
        else:
            check_file_exists(options.coverage_file)
            coverage_file = options.coverage_file

        # get tetranucleotide signatures
        if not options.tetra_file:
            tetra = Tetranucleotide(options.cpus)
            tetra_file = os.path.join(options.output_dir, 'tetra.tsv')
            signatures = tetra.run(options.scaffold_file)
            tetra.write(signatures, tetra_file)
            self.logger.info('Tetranucleotide signatures written to: %s' % tetra_file)
        else:
            tetra_file = options.tetra_file
github dparks1134 / RefineM / refinem / scaffold_stats.py View on Github external
genome_files : list of str
            Fasta files with binned scaffolds.
        tetra_file : str
            Tetranucleotide signatures for scaffolds.
        coverage_file : str
            Coverage profiles for scaffolds
        output_file : str
            Output file for scaffolds statistics.
        """

        tetra = Tetranucleotide(self.cpus)
        signatures = tetra.read(tetra_file)

        cov_profiles = None
        if coverage_file:
            coverage = Coverage(self.cpus)
            cov_profiles, _ = coverage.read(coverage_file)

        # determine bin assignment for each scaffold
        self.logger.info('Determining scaffold statistics.')

        scaffold_id_genome_id = {}
        for gf in genome_files:
            genome_id = remove_extension(gf)
            for scaffold_id, _seq in seq_io.read_seq(gf):
                scaffold_id_genome_id[scaffold_id] = genome_id

        # write out scaffold statistics
        fout = open(output_file, 'w')
        fout.write('Scaffold id\tGenome Id\tGC\tLength (bp)')

        if cov_profiles: