How to use the gtdbtk.biolib_lite.common.make_sure_path_exists function in gtdbtk

To help you get started, we’ve selected a few gtdbtk examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Ecogenomics / GTDBTk / gtdbtk / classify.py View on Github external
raise GenomeMarkerSetUnknown

            shutil.copyfile(user_msa_file, t)
            user_msa_file = t

        # run pplacer to place bins in reference genome tree
        num_genomes = sum([1 for _seq_id, _seq in read_seq(user_msa_file)])

        # check if a scratch file is to be created
        pplacer_mmap_file = None
        if scratch_dir:
            self.logger.info('Using a scratch file for pplacer allocations. '
                             'This decreases memory usage and performance.')
            pplacer_mmap_file = os.path.join(
                scratch_dir, prefix + ".pplacer.scratch")
            make_sure_path_exists(scratch_dir)

        # get path to pplacer reference package
        if marker_set_id == 'bac120':
            if levelopt is None:
                self.logger.info(
                    f'Placing {num_genomes} bacterial genomes into reference tree with pplacer using {self.pplacer_cpus} cpus (be patient).')
                pplacer_ref_pkg = os.path.join(
                    Config.PPLACER_DIR, Config.PPLACER_BAC120_REF_PKG)
            elif levelopt == 'high':
                self.logger.info(
                    f'Placing {num_genomes} bacterial genomes into high reference tree with pplacer using {self.pplacer_cpus} cpus (be patient).')
                pplacer_ref_pkg = os.path.join(
                    Config.HIGH_PPLACER_DIR, Config.HIGH_PPLACER_REF_PKG)
            elif levelopt == 'low':
                self.logger.info(
                    f'Placing {num_genomes} bacterial genomes into low reference tree {tree_iter} with pplacer using {self.pplacer_cpus} cpus (be patient).')
github Ecogenomics / GTDBTk / gtdbtk / classify.py View on Github external
"""

        if marker_set_id == 'bac120':
            marker_dict = Config.RED_DIST_BAC_DICT
            out_path = os.path.join(
                out_dir, PATH_BAC120_RED_DICT.format(prefix=prefix))
        elif marker_set_id == 'ar122':
            marker_dict = Config.RED_DIST_ARC_DICT
            out_path = os.path.join(
                out_dir, PATH_AR122_RED_DICT.format(prefix=prefix))
        else:
            self.logger.error('There was an error determining the marker set.')
            raise GenomeMarkerSetUnknown

        make_sure_path_exists(os.path.dirname(out_path))

        with open(out_path, 'w') as reddictfile:
            reddictfile.write('Phylum\t{}\n'.format(marker_dict.get('p__')))
            reddictfile.write('Class\t{}\n'.format(marker_dict.get('c__')))
            reddictfile.write('Order\t{}\n'.format(marker_dict.get('o__')))
            reddictfile.write('Family\t{}\n'.format(marker_dict.get('f__')))
            reddictfile.write('Genus\t{}\n'.format(marker_dict.get('g__')))

        return marker_dict
github Ecogenomics / GTDBTk / gtdbtk / biolib_lite / logger.py View on Github external
timestamp_stream_logger.setFormatter(SpecialFormatter())
    timestamp_logger.addHandler(timestamp_stream_logger)

    no_timestamp_stream_logger = logging.StreamHandler(sys.stdout)
    no_timestamp_stream_logger.setFormatter(None)
    no_timestamp_logger.addHandler(no_timestamp_stream_logger)

    timestamp_logger.is_silent = False
    no_timestamp_stream_logger.is_silent = False
    if silent:
        timestamp_logger.is_silent = True
        timestamp_stream_logger.setLevel(logging.ERROR)
        no_timestamp_stream_logger.is_silent = True

    if log_dir:
        make_sure_path_exists(log_dir)
        timestamp_file_logger = logging.FileHandler(os.path.join(log_dir,
                                                                 log_file), 'a')
        timestamp_file_logger.setFormatter(ColourlessFormatter())
        timestamp_logger.addHandler(timestamp_file_logger)

        no_timestamp_file_logger = logging.FileHandler(os.path.join(log_dir,
                                                                    log_file), 'a')
        no_timestamp_file_logger.setFormatter(None)
        no_timestamp_logger.addHandler(no_timestamp_file_logger)

        warning_fh = logging.FileHandler(os.path.join(log_dir,
                                                      log_file.replace('.log', '.warnings.log')), 'a')
        warning_fh.setFormatter(ColourlessFormatter())
        warning_logger.addHandler(warning_fh)

    timestamp_logger.info('%s v%s' % (program_name, version))
github Ecogenomics / GTDBTk / gtdbtk / main.py View on Github external
def infer(self, options):
        """Infer a tree from a user specified MSA.

        Parameters
        ----------
        options : argparse.Namespace
            The CLI arguments input by the user.
        """

        check_file_exists(options.msa_file)
        make_sure_path_exists(options.out_dir)

        check_dependencies(['FastTree' + ('MP' if options.cpus > 1 else '')])

        if hasattr(options, 'suffix'):
            output_tree = os.path.join(options.out_dir,
                                       PATH_MARKER_UNROOTED_TREE.format(prefix=options.prefix,
                                                                        marker=options.suffix))
            tree_log = os.path.join(options.out_dir,
                                    PATH_MARKER_TREE_LOG.format(prefix=options.prefix,
                                                                marker=options.suffix))
            fasttree_log = os.path.join(options.out_dir,
                                        PATH_MARKER_FASTTREE_LOG.format(prefix=options.prefix,
                                                                        marker=options.suffix))
        else:
            output_tree = os.path.join(options.out_dir,
                                       PATH_UNROOTED_TREE.format(prefix=options.prefix))
github Ecogenomics / GTDBTk / gtdbtk / main.py View on Github external
def classify(self, options):
        """Determine taxonomic classification of genomes.

        Parameters
        ----------
        options : argparse.Namespace
            The CLI arguments input by the user.
        """

        # See ticket #255... perhaps an upstream version/OS issue?
        if not hasattr(options, 'pplacer_cpus'):
            options.pplacer_cpus = None

        check_dir_exists(options.align_dir)
        make_sure_path_exists(options.out_dir)
        if options.scratch_dir:
            make_sure_path_exists(options.scratch_dir)

        genomes, _ = self._genomes_to_process(
            options.genome_dir, options.batchfile, options.extension)

        classify = Classify(options.cpus, options.pplacer_cpus)
        classify.run(genomes,
                     options.align_dir,
                     options.out_dir,
                     options.prefix,
                     options.scratch_dir,
                     options.recalculate_red,
                     options.debug,
                     options.split_tree)
github Ecogenomics / GTDBTk / gtdbtk / classify.py View on Github external
def _place_in_low_tree(self, tree_iter, listg, msa_dict, marker_set_id, prefix, scratch_dir, out_dir):
        make_sure_path_exists(os.path.join(
            out_dir, DIR_LOW_PPLACER.format(iter=tree_iter)))
        submsa_file_path = os.path.join(
            out_dir, PATH_LOW_BAC120_SUBMSA.format(iter=tree_iter))

        submsa_file = open(submsa_file_path, 'w')

        for gid in listg:
            submsa_file.write('>{}\n{}\n'.format(gid, msa_dict.get(gid)))
        submsa_file.close()
        low_classify_tree = self.place_genomes(PATH_LOW_BAC120_SUBMSA.format(iter=tree_iter),
                                               marker_set_id,
                                               out_dir,
                                               prefix,
                                               scratch_dir,
                                               'low', tree_iter)
        return low_classify_tree, submsa_file_path
github Ecogenomics / GTDBTk / gtdbtk / main.py View on Github external
def identify(self, options):
        """Identify marker genes in genomes.

        Parameters
        ----------
        options : argparse.Namespace
            The CLI arguments input by the user.
        """

        if options.genome_dir:
            check_dir_exists(options.genome_dir)

        if options.batchfile:
            check_file_exists(options.batchfile)

        make_sure_path_exists(options.out_dir)

        genomes, tln_tables = self._genomes_to_process(
            options.genome_dir, options.batchfile, options.extension)
        self.genomes_to_process = genomes

        markers = Markers(options.cpus)
        markers.identify(genomes,
                         tln_tables,
                         options.out_dir,
                         options.prefix,
                         options.force)

        self.logger.info('Done.')
github Ecogenomics / GTDBTk / gtdbtk / main.py View on Github external
def align(self, options):
        """Create MSA from marker genes.

        Parameters
        ----------
        options : argparse.Namespace
            The CLI arguments input by the user.
        """

        check_dir_exists(options.identify_dir)
        make_sure_path_exists(options.out_dir)

        if not hasattr(options, 'outgroup_taxon'):
            options.outgroup_taxon = None

        markers = Markers(options.cpus, options.debug)
        markers.align(options.identify_dir,
                      options.skip_gtdb_refs,
                      options.taxa_filter,
                      options.min_perc_aa,
                      options.custom_msa_filters,
                      options.skip_trimming,
                      options.rnd_seed,
                      options.cols_per_gene,
                      options.min_consensus,
                      options.max_consensus,
                      options.min_perc_taxa,