How to use comparem - 10 common examples

To help you get started, we’ve selected a few comparem examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dparks1134 / CompareM / comparem / classify.py View on Github external
Flag indicating if RBH should be written to file.
        output_dir : str
            Directory to store AAI results.
        """
        
        # read taxonomic identification of each genome
        taxonomy = {}
        if taxonomy_file:
            for line in open(taxonomy_file):
                genome_id, taxa_str = line.rstrip().split('\t')
                taxonomy[genome_id] = taxa_str

        # calculate AAI between query and target genomes
        aai_output_dir = os.path.join(output_dir, 'aai')
        make_sure_path_exists(aai_output_dir)
        aai_calculator = AAICalculator(self.cpus)
        aai_output_file, rbh_output_file = aai_calculator.run(query_gene_file,
                                                                target_gene_file,
                                                                sorted_hit_table,
                                                                evalue_threshold,
                                                                per_iden_threshold,
                                                                per_aln_len_threshold,
                                                                keep_rbhs,
                                                                aai_output_dir)

        # determine matches to each query genomes
        aai_results_file = os.path.join(aai_output_dir, 'aai_summary.tsv')
        with open(aai_results_file) as f:
            f.readline()
            
            hits = defaultdict(list)
            for line in f:
github dparks1134 / CompareM / comparem / main.py View on Github external
def hclust(self, options):
        """Hierarchical clustering command"""
        
        if options.similarity and not options.max_sim_value:
            self.logger.error("The 'max_sim_value' must be specified for similarity values.")
            sys.exit(-1)

        self.logger.info('Performing hierarchical clustering.')
        hclust = HierarchicalCluster()
        hclust.run(options.pairwise_value_file,
                    options.method, 
                    options.similarity,
                    options.max_sim_value,
                    options.name_col1,
                    options.name_col2,
                    options.value_col,
                    options.output_tree)
        
        self.logger.info('Hierarchical cluster tree written to: %s' % options.output_tree)
github dparks1134 / CompareM / comparem / main.py View on Github external
def kmer_usage(self, options):
        """Kmer usage command"""

        if options.k > 10 or options.k <= 0:
            self.logger.warning('CompareM only support kmers with k <= 10.')
            sys.exit(0)

        genome_files = self._input_files(options.genome_files, options.file_ext)

        # calculate amino acid usage
        kmer_usage = KmerUsage(options.k, options.cpus)
        genome_kmer_usage, kmer_set = kmer_usage.run(genome_files)

        # write out results
        self.logger.info('Writing kmer profiles to file (be patient!).')
        self._write_usage_profile(genome_kmer_usage, 
                                    kmer_set, 
                                    options.counts, 
                                    options.output_file)

        self.logger.info('Kmer usage written to: %s' % options.output_file)
github dparks1134 / CompareM / comparem / main.py View on Github external
def codon_usage(self, options):
        """Codon usage command"""

        gene_files = self._input_files(options.nucleotide_gene_files, options.file_ext)

        # calculate amino acid usage
        codon_usage = CodonUsage(options.cpus, options.keep_ambiguous)
        genome_codon_usage, codon_set, _mean_length = codon_usage.run(gene_files)

        # write out results
        self._write_usage_profile(genome_codon_usage, 
                                    codon_set, 
                                    options.counts, 
                                    options.output_file)

        self.logger.info('Codon usage written to: %s' % options.output_file)
github dparks1134 / CompareM / comparem / main.py View on Github external
def stop_usage(self, options):
        """Stop codon usage command"""

        gene_files = self._input_files(options.nucleotide_gene_files, options.file_ext)

        # calculate amino acid usage
        codon_usage = CodonUsage(options.cpus, keep_ambiguous=False, stop_codon_only=True)
        genome_codon_usage, codon_set, mean_gene_length = codon_usage.run(gene_files)

        # write out results
        if not options.mean_gene_length:
            self._write_usage_profile(genome_codon_usage, 
                                        codon_set, 
                                        options.counts, 
                                        options.output_file)
        else:
            fout = open(options.output_file, 'w')
            for codon in codon_set:
                fout.write('\t' + codon)
                if mean_gene_length:
                    fout.write('\t' + codon + ': avg. seq. length')
            fout.write('\n')
github dparks1134 / CompareM / comparem / main.py View on Github external
def aai(self, options):
        """AAI command"""
        check_file_exists(options.sorted_hit_table)
        make_sure_path_exists(options.output_dir)

        aai_calculator = AAICalculator(options.cpus)
        aai_output_file, rbh_output_file = aai_calculator.run(options.query_gene_file,
                                                                None,
                                                                options.sorted_hit_table,
                                                                options.evalue,
                                                                options.per_identity,
                                                                options.per_aln_len,
                                                                options.keep_rbhs,
                                                                options.output_dir)

        if rbh_output_file:
            self.logger.info('Identified reciprocal best hits written to: %s' % rbh_output_file)
            
        self.logger.info('AAI between genomes written to: %s' % aai_output_file)
github dparks1134 / CompareM / comparem / main.py View on Github external
def aa_usage(self, options):
        """Amino acid usage command"""

        gene_files = self._input_files(options.protein_gene_files, options.file_ext)

        # calculate amino acid usage
        amino_acid_usage = AminoAcidUsage(options.cpus)
        genome_aa_usage, aa_set = amino_acid_usage.run(gene_files)

        # write out results
        self._write_usage_profile(genome_aa_usage, 
                                    aa_set, 
                                    options.counts, 
                                    options.output_file)

        self.logger.info('Amino acid usage written to: %s' % options.output_file)
github dparks1134 / CompareM / comparem / main.py View on Github external
def classify(self, options):
        """Classify genomes based on AAI values."""
        check_file_exists(options.sorted_hit_table)
        make_sure_path_exists(options.output_dir)
        
        classify = Classify(options.cpus)
        results_file = classify.run(options.query_gene_file,
                                        options.target_gene_file,
                                        options.sorted_hit_table,
                                        options.evalue,
                                        options.per_identity,
                                        options.per_aln_len,
                                        options.num_top_targets,
                                        options.taxonomy_file,
                                        options.keep_rbhs,
                                        options.output_dir)
        
        self.logger.info('Classification results written to: %s' % results_file)
github dparks1134 / CompareM / comparem / main.py View on Github external
def lgt_codon(self, options):
        """LGT dinucleotide usage command"""

        gene_files = self._input_files(options.nucleotide_gene_files, options.file_ext)

        lgt_codon = LgtCodon(options.cpus)
        lgt_codon.run(gene_files, options.output_dir)

        self.logger.info('Codon usage written to directory: %s' % options.output_dir)
github dparks1134 / CompareM / comparem / main.py View on Github external
def lgt_di(self, options):
        """LGT dinucleotide usage command"""

        gene_files = self._input_files(options.nucleotide_gene_files, options.file_ext)

        lgt_dinucleotide = LgtDinucleotide(options.cpus)
        lgt_dinucleotide.run(gene_files, options.crit_value, options.output_dir)

        self.logger.info('Dinucleotide usage written to directory: %s' % options.output_dir)