How to use the gtdbtk.biolib_lite.common.check_file_exists function in gtdbtk

To help you get started, we’ve selected a few gtdbtk examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Ecogenomics / GTDBTk / gtdbtk / main.py View on Github external
def _read_taxonomy_files(self, options) -> Dict[str, Tuple[str, str, str, str, str, str, str]]:
        """Read and merge taxonomy files."""

        self.logger.info('Reading GTDB taxonomy for representative genomes.')
        taxonomy = Taxonomy().read(Config.TAXONOMY_FILE)

        if options.gtdbtk_classification_file:
            # add and overwrite taxonomy for genomes specified in the
            # GTDB-Tk classification file
            check_file_exists(options.gtdbtk_classification_file)

            self.logger.info('Reading GTDB-Tk classification file.')
            gtdbtk_taxonomy = Taxonomy().read(options.gtdbtk_classification_file)
            del gtdbtk_taxonomy['user_genome']
            num_reassigned = 0
            for gid, taxa in gtdbtk_taxonomy.items():
                if gid in taxonomy:
                    num_reassigned += 1
                taxonomy[gid] = taxa

            self.logger.info(f'Read GTDB-Tk classifications for {len(gtdbtk_taxonomy):,} genomes.')
            self.logger.info(f'Reassigned taxonomy for {num_reassigned:,} GTDB representative genomes.')

        if options.custom_taxonomy_file:
            # add and overwrite taxonomy for genomes specified in the
            # custom taxonomy file
github Ecogenomics / GTDBTk / gtdbtk / main.py View on Github external
def decorate(self, options):
        """Decorate tree with GTDB taxonomy.

        Parameters
        ----------
        options : argparse.Namespace
            The CLI arguments input by the user.
        """

        check_file_exists(options.input_tree)

        taxonomy = self._read_taxonomy_files(options)

        d = Decorate()
        d.run(options.input_tree,
              taxonomy,
              options.output_tree)

        self.logger.info('Done.')

        # symlink to the decorated tree file, if not run independently
        if hasattr(options, 'suffix'):
            if options.suffix == 'bac120':
                symlink_f(PATH_BAC120_DECORATED_TREE.format(prefix=options.prefix),
                          os.path.join(options.out_dir,
                                       os.path.basename(PATH_BAC120_DECORATED_TREE.format(prefix=options.prefix))))
github Ecogenomics / GTDBTk / gtdbtk / main.py View on Github external
def identify(self, options):
        """Identify marker genes in genomes.

        Parameters
        ----------
        options : argparse.Namespace
            The CLI arguments input by the user.
        """

        if options.genome_dir:
            check_dir_exists(options.genome_dir)

        if options.batchfile:
            check_file_exists(options.batchfile)

        make_sure_path_exists(options.out_dir)

        genomes, tln_tables = self._genomes_to_process(
            options.genome_dir, options.batchfile, options.extension)
        self.genomes_to_process = genomes

        markers = Markers(options.cpus)
        markers.identify(genomes,
                         tln_tables,
                         options.out_dir,
                         options.prefix,
                         options.force)

        self.logger.info('Done.')
github Ecogenomics / GTDBTk / gtdbtk / main.py View on Github external
def infer(self, options):
        """Infer a tree from a user specified MSA.

        Parameters
        ----------
        options : argparse.Namespace
            The CLI arguments input by the user.
        """

        check_file_exists(options.msa_file)
        make_sure_path_exists(options.out_dir)

        check_dependencies(['FastTree' + ('MP' if options.cpus > 1 else '')])

        if hasattr(options, 'suffix'):
            output_tree = os.path.join(options.out_dir,
                                       PATH_MARKER_UNROOTED_TREE.format(prefix=options.prefix,
                                                                        marker=options.suffix))
            tree_log = os.path.join(options.out_dir,
                                    PATH_MARKER_TREE_LOG.format(prefix=options.prefix,
                                                                marker=options.suffix))
            fasttree_log = os.path.join(options.out_dir,
                                        PATH_MARKER_FASTTREE_LOG.format(prefix=options.prefix,
                                                                        marker=options.suffix))
        else:
            output_tree = os.path.join(options.out_dir,
github Ecogenomics / GTDBTk / gtdbtk / main.py View on Github external
def infer_ranks(self, options):
        """Establish taxonomic ranks of internal nodes using RED."""

        check_file_exists(options.input_tree)

        p = InferRanks()
        p.run(options.input_tree,
              options.ingroup_taxon,
              options.output_tree)

        self.logger.info('Done.')
github Ecogenomics / GTDBTk / gtdbtk / main.py View on Github external
self.logger.info('Reading GTDB-Tk classification file.')
            gtdbtk_taxonomy = Taxonomy().read(options.gtdbtk_classification_file)
            del gtdbtk_taxonomy['user_genome']
            num_reassigned = 0
            for gid, taxa in gtdbtk_taxonomy.items():
                if gid in taxonomy:
                    num_reassigned += 1
                taxonomy[gid] = taxa

            self.logger.info(f'Read GTDB-Tk classifications for {len(gtdbtk_taxonomy):,} genomes.')
            self.logger.info(f'Reassigned taxonomy for {num_reassigned:,} GTDB representative genomes.')

        if options.custom_taxonomy_file:
            # add and overwrite taxonomy for genomes specified in the
            # custom taxonomy file
            check_file_exists(options.custom_taxonomy_file)

            self.logger.info('Reading custom taxonomy file.')
            custom_taxonomy = Taxonomy().read(options.custom_taxonomy_file)
            num_reassigned = 0
            for gid, taxa in custom_taxonomy.items():
                if gid in taxonomy:
                    num_reassigned += 1
                taxonomy[gid] = taxa

            self.logger.info(f'Read custom taxonomy for {len(custom_taxonomy):,} genomes.')
            self.logger.info(f'Reassigned taxonomy for {num_reassigned:,} GTDB representative genomes.')

        if options.gtdbtk_classification_file and options.custom_taxonomy_file:
            dup_genomes = set(gtdbtk_taxonomy).intersection(custom_taxonomy)
            if len(dup_genomes) > 0:
                self.logger.error('GTDB-Tk classification and custom taxonomy '