How to use the biolib.common.check_file_exists function in biolib

To help you get started, we’ve selected a few biolib examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dparks1134 / RefineM / refinem / main.py View on Github external
def split(self, options):
        """Split command"""
        
        check_file_exists(options.scaffold_stats_file)
        check_file_exists(options.genome_file)
        make_sure_path_exists(options.output_dir)

        self.logger.info('Reading scaffold statistics.')
        scaffold_stats = ScaffoldStats()
        scaffold_stats.read(options.scaffold_stats_file)

        cluster = Cluster(1)
        cluster.split(scaffold_stats,
                        options.criteria1,
                        options.criteria2,
                        options.genome_file,
                        options.output_dir)

        self.logger.info('Partitioned sequences written to: ' + options.output_dir)
github dparks1134 / RefineM / refinem / main.py View on Github external
def taxon_profile(self, options):
        """Call genes command"""

        make_sure_path_exists(options.output_dir)
        check_file_exists(options.scaffold_stats_file)
        check_file_exists(options.taxonomy_file)
        check_file_exists(options.db_file)

        gene_files = self._genome_files(options.genome_prot_dir, options.protein_ext)
        if not self._check_protein_seqs(gene_files):
            self.logger.warning('All files must contain amino acid sequences.')
            sys.exit()

        # build gene profile
        taxon_profile = TaxonProfile(options.cpus, options.output_dir)
        taxon_profile.run(gene_files,
                            options.scaffold_stats_file,
                            options.db_file,
                            options.taxonomy_file,
                            options.per_to_classify,
                            options.evalue,
github dparks1134 / RefineM / refinem / main.py View on Github external
def manual(self, options):
        """Manual command"""
        
        check_file_exists(options.cluster_file)
        check_file_exists(options.genome_file)
        make_sure_path_exists(options.output_dir)
        
        genome_id = remove_extension(options.genome_file)

        seqs = seq_io.read(options.genome_file)
        fout = {}
        with open(options.cluster_file) as f:
            f.readline()
            
            for line in f:
                line_split = line.rstrip().split('\t')
                scaffold_id = line_split[0]
                cluster_id = int(line_split[1])
                
                if cluster_id < 0:
github dparks1134 / RefineM / refinem / main.py View on Github external
def taxon_profile(self, options):
        """Call genes command"""

        make_sure_path_exists(options.output_dir)
        check_file_exists(options.scaffold_stats_file)
        check_file_exists(options.taxonomy_file)
        check_file_exists(options.db_file)

        gene_files = self._genome_files(options.genome_prot_dir, options.protein_ext)
        if not self._check_protein_seqs(gene_files):
            self.logger.warning('All files must contain amino acid sequences.')
            sys.exit()

        # build gene profile
        taxon_profile = TaxonProfile(options.cpus, options.output_dir)
        taxon_profile.run(gene_files,
                            options.scaffold_stats_file,
                            options.db_file,
                            options.taxonomy_file,
                            options.per_to_classify,
                            options.evalue,
                            options.per_identity,
                            options.per_aln_len,
github dparks1134 / CompareM / comparem / main.py View on Github external
def aai(self, options):
        """AAI command"""
        check_file_exists(options.sorted_hit_table)
        make_sure_path_exists(options.output_dir)

        aai_calculator = AAICalculator(options.cpus)
        aai_output_file, rbh_output_file = aai_calculator.run(options.query_gene_file,
                                                                None,
                                                                options.sorted_hit_table,
                                                                options.evalue,
                                                                options.per_identity,
                                                                options.per_aln_len,
                                                                options.keep_rbhs,
                                                                options.output_dir)

        if rbh_output_file:
            self.logger.info('Identified reciprocal best hits written to: %s' % rbh_output_file)
            
        self.logger.info('AAI between genomes written to: %s' % aai_output_file)
github Ecogenomics / GTDBTk / src / gtdbtk / gtdbtk.py View on Github external
def align(self, options):
        """Create MSA from marker genes."""
        
        if options.genome_dir:
            check_dir_exists(options.genome_dir)
            
        if options.batchfile:
            check_file_exists(options.batchfile)
            
        check_dir_exists(options.identify_dir)
        make_sure_path_exists(options.out_dir)
 
        marker_set_id = self._marker_set_id(options.bac120_ms,
                                            options.ar122_ms,
                                            options.rps23_ms)
          
        markers = Markers(options.threads)
        markers.align(options.genome_dir,
                        options.batchfile,
                        options.identify_dir,
                        marker_set_id,
                        options.taxa_filter,
                        options.min_perc_aa,
                        options.custom_msa_filters,
github Ecogenomics / GTDBTk / src / gtdbtk / gtdbtk.py View on Github external
def identify(self, options):
        """Identify marker genes in genomes."""
        
        try:
        
            if options.genome_dir:
                check_dir_exists(options.genome_dir)
                
            if options.batchfile:
                check_file_exists(options.batchfile)
                
            make_sure_path_exists(options.out_dir)
                
            markers = Markers(options.cpus)
            markers.identify(options.genome_dir,
                                options.batchfile,
                                options.proteins,
                                options.out_dir, 
                                options.prefix)
                                
            self.logger.info('Done.')
        
        except Exception as e:
            self.logger.info('GTDB-Tk has encountered an error.')
github Ecogenomics / GTDBTk / src / gtdbtk / gtdbtk.py View on Github external
def root(self, options):
        """Root tree using outgroup."""
        
        check_file_exists(options.input_tree)
        
        gtdb_taxonomy = Taxonomy().read(Config.TAXONOMY_FILE)
        
        self.logger.info('Identifying genomes from the specified outgroup.')
        outgroup = set()
        for genome_id, taxa in gtdb_taxonomy.iteritems():
            if options.outgroup_taxon in taxa:
                outgroup.add(genome_id)

        reroot = RerootTree()
        reroot.root_with_outgroup(options.input_tree, 
                                    options.output_tree, 
                                    outgroup)
        
        self.logger.info('Done.')