How to use the refinem.outliers.Outliers function in refinem

To help you get started, we’ve selected a few refinem examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dparks1134 / RefineM / refinem / main.py View on Github external
def filter_bins(self, options):
        """Filter bins command"""
        
        make_sure_path_exists(options.output_dir)
        
        genome_files = self._genome_files(options.genome_nt_dir, options.genome_ext)
        if not self._check_nuclotide_seqs(genome_files):
            self.logger.warning('All files must contain nucleotide sequences.')
            sys.exit()

        outliers = Outliers()
        for genome_file in genome_files:
            gf = remove_extension(genome_file) + '.filtered.' + options.genome_ext
            out_genome = os.path.join(options.output_dir, gf)
            outliers.remove_outliers(genome_file, options.filter_file, out_genome, options.modified_only)

        self.logger.info('Modified genome written to: ' + options.output_dir)
github dparks1134 / RefineM / refinem / main.py View on Github external
sys.exit()

        if options.outlier_file and options.compatible_file:
            self.logger.warning("The 'outlier_file' and 'compatible_file' options cannot be specified at the same time.\n")
            sys.exit()

        failed_to_add = []
        failed_to_remove = []
        if options.add or options.remove:
            failed_to_add, failed_to_remove = genome_tk.modify(options.genome_file,
                                                               options.scaffold_file,
                                                               options.add,
                                                               options.remove,
                                                               options.output_genome)
        elif options.outlier_file:
            outliers = Outliers()
            outliers.remove_outliers(options.genome_file, 
                                        options.outlier_file, 
                                        options.output_genome, 
                                        False)
        elif options.compatible_file:
            outliers = Outliers()
            if options.unique_only:
                outliers.add_compatible_unique(options.scaffold_file, 
                                                options.genome_file, 
                                                options.compatible_file,
                                                options.min_len, 
                                                options.output_genome)
            elif options.closest_only:
                outliers.add_compatible_closest(options.scaffold_file, 
                                                    options.genome_file, 
                                                    options.compatible_file,
github dparks1134 / RefineM / refinem / main.py View on Github external
failed_to_add = []
        failed_to_remove = []
        if options.add or options.remove:
            failed_to_add, failed_to_remove = genome_tk.modify(options.genome_file,
                                                               options.scaffold_file,
                                                               options.add,
                                                               options.remove,
                                                               options.output_genome)
        elif options.outlier_file:
            outliers = Outliers()
            outliers.remove_outliers(options.genome_file, 
                                        options.outlier_file, 
                                        options.output_genome, 
                                        False)
        elif options.compatible_file:
            outliers = Outliers()
            if options.unique_only:
                outliers.add_compatible_unique(options.scaffold_file, 
                                                options.genome_file, 
                                                options.compatible_file,
                                                options.min_len, 
                                                options.output_genome)
            elif options.closest_only:
                outliers.add_compatible_closest(options.scaffold_file, 
                                                    options.genome_file, 
                                                    options.compatible_file, 
                                                    options.min_len, 
                                                    options.output_genome)
            else:
                outliers.add_compatible(options.scaffold_file, 
                                        options.genome_file, 
                                        options.compatible_file,
github dparks1134 / RefineM / refinem / main.py View on Github external
# read scaffold statistics and calculate genome stats
        self.logger.info('Reading scaffold statistics.')
        scaffold_stats = ScaffoldStats()
        scaffold_stats.read(options.scaffold_stats_file)

        genome_stats = GenomeStats()
        genome_stats = genome_stats.run(scaffold_stats)

        # identify putative homologs to reference genomes
        reference = Reference(1, None)
        putative_homologs = reference.homology_check(options.reference_file,
                                                         options.min_genes,
                                                         float(options.perc_genes))

        # identify scaffolds compatible with bins
        outliers = Outliers()
        output_file = os.path.join(options.output_dir, 'compatible.tsv')
        outliers.compatible(putative_homologs, scaffold_stats, genome_stats,
                                      options.gc_perc, options.td_perc,
                                      options.cov_corr, options.cov_perc,
                                      options.report_type, output_file)

        self.logger.info('Results written to: ' + output_file)
github dparks1134 / RefineM / refinem / main.py View on Github external
def outliers(self, options):
        """Outlier command"""

        check_file_exists(options.scaffold_stats_file)
        make_sure_path_exists(options.output_dir)

        self.logger.info('Reading scaffold statistics.')
        scaffold_stats = ScaffoldStats()
        scaffold_stats.read(options.scaffold_stats_file)

        genome_stats = GenomeStats()
        genome_stats = genome_stats.run(scaffold_stats)

        # identify outliers
        outliers = Outliers()
        outlier_file = os.path.join(options.output_dir, 'outliers.tsv')
        outliers.identify(scaffold_stats, genome_stats,
                                      options.gc_perc, options.td_perc,
                                      options.cov_corr, options.cov_perc,
                                      options.report_type, outlier_file)
        self.logger.info('Outlier information written to: ' + outlier_file)

        # create outlier plots
        if options.create_plots:
            plot_dir = os.path.join(options.output_dir, 'plots')
            make_sure_path_exists(plot_dir)
        
            outliers.plot(scaffold_stats,
                            genome_stats,
                            outliers.gc_dist,
                            outliers.td_dist,