How to use the refinem.taxon_profile.TaxonProfile function in refinem

To help you get started, we’ve selected a few refinem examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dparks1134 / RefineM / refinem / main.py View on Github external
def taxon_profile(self, options):
        """Call genes command"""

        make_sure_path_exists(options.output_dir)
        check_file_exists(options.scaffold_stats_file)
        check_file_exists(options.taxonomy_file)
        check_file_exists(options.db_file)

        gene_files = self._genome_files(options.genome_prot_dir, options.protein_ext)
        if not self._check_protein_seqs(gene_files):
            self.logger.warning('All files must contain amino acid sequences.')
            sys.exit()

        # build gene profile
        taxon_profile = TaxonProfile(options.cpus, options.output_dir)
        taxon_profile.run(gene_files,
                            options.scaffold_stats_file,
                            options.db_file,
                            options.taxonomy_file,
                            options.per_to_classify,
                            options.evalue,
                            options.per_identity,
                            options.per_aln_len,
                            options.tmpdir)

        self.logger.info('Results written to: %s' % options.output_dir)
github dparks1134 / RefineM / refinem / main.py View on Github external
def taxon_filter(self, options):
        """Taxon filter command"""
        
        make_sure_path_exists(options.taxon_profile_dir)

        # build gene profile
        taxon_profile = TaxonProfile(options.cpus, options.taxon_profile_dir)
        
        if False:
            taxon_profile.filter(options.genome_threshold,
                                options.min_scaffold_agreement,
                                options.max_scaffold_disagreement,
                                options.min_classified_per,
                                options.output_file)
        else:
            taxon_profile.filter(options.consensus_taxon,
                                options.trusted_scaffold,
                                options.common_taxa,
                                options.congruent_scaffold,
                                options.min_classified_per,
                                options.min_classified,
                                options.consensus_scaffold,
                                options.output_file)
github dparks1134 / RefineM / refinem / ssu.py View on Github external
ssu_order : float
        ssu_family : float
        ssu_genus : float
        output_dir : str
            Directory for output files.
        """
        
        header = 'Scaffold id\tGenome id\tGenome classification\tIncongruent common taxa set'
        header += '\tNo. 16S in Genome'
        header += '\t16S Classification\t16S length\t16S e-value\t16S alignment length\t16S percent identity'
        header += '\tScaffold length (bp)\n'
        
        fout = open(os.path.join(output_dir, 'ssu_erroneous.tsv'), 'w')
        fout.write(header)

        taxon_profile = TaxonProfile(1, taxon_profile_dir)
        common_taxa = taxon_profile.common_taxa(common_taxon_threshold, 25.0)
        genome_taxonomy = taxon_profile.read_genome_taxonomy()

        for genome_id, scaffold_ids in ssu_hits.items():
            # **** HACK for SRA processing
            gid = genome_id.replace('.filtered', '')
        
            for scaffold_id in scaffold_ids:
                hmm_model, evalue, _start, _stop, ssu_length, _rev_comp, scaffold_len = ssu_hits[genome_id][scaffold_id]
                
                evalue = float(evalue)
                ssu_length = int(ssu_length)
                scaffold_len = int(scaffold_len)
                
                if ssu_length < ssu_min_length:
                    continue