How to use refgenie - 10 common examples

To help you get started, we’ve selected a few refgenie examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github databio / refgenie / refgenie / refgenie.py View on Github external
parser = logmuse.add_logging_options(build_argparser())
    args, remaining_args = parser.parse_known_args()
    global _LOGGER
    _LOGGER = logmuse.logger_via_cli(args, make_root=True)
    _LOGGER.debug("refgenie {}".format(__version__))
    _LOGGER.debug("Args: {}".format(args))

    if not args.command:
        parser.print_help()
        _LOGGER.error("No command given")
        sys.exit(1)

    gencfg = refgenconf.select_genome_config(filename=args.genome_config, check_exist=not args.command == INIT_CMD,
                                             on_missing=lambda fp: fp, strict_env=True)
    if gencfg is None:
        raise MissingGenomeConfigError(args.genome_config)
    _LOGGER.debug("Determined genome config: {}".format(gencfg))

    # From user input we want to construct a list of asset dicts, where each
    # asset has a genome name, asset name, and tag

    if "asset_registry_paths" in args and args.asset_registry_paths:
        _LOGGER.debug("Found registry_path: {}".format(args.asset_registry_paths))
        asset_list = [parse_registry_path(x) for x in args.asset_registry_paths]

        for a in asset_list:
            # every asset must have a genome, either provided via registry path
            # or the args.genome arg.
            if not a["genome"]:
                if args.genome:
                    a["genome"] = args.genome
                else:
github databio / refgenie / refgenie / refgenie.py View on Github external
def main():
    """ Primary workflow """
    parser = logmuse.add_logging_options(build_argparser())
    args, remaining_args = parser.parse_known_args()
    global _LOGGER
    _LOGGER = logmuse.logger_via_cli(args, make_root=True)
    _LOGGER.debug("refgenie {}".format(__version__))
    _LOGGER.debug("Args: {}".format(args))

    if not args.command:
        parser.print_help()
        _LOGGER.error("No command given")
        sys.exit(1)

    gencfg = refgenconf.select_genome_config(filename=args.genome_config, check_exist=not args.command == INIT_CMD,
                                             on_missing=lambda fp: fp, strict_env=True)
    if gencfg is None:
        raise MissingGenomeConfigError(args.genome_config)
    _LOGGER.debug("Determined genome config: {}".format(gencfg))

    # From user input we want to construct a list of asset dicts, where each
    # asset has a genome name, asset name, and tag
github databio / refgenie / refgenie / refgenie.py View on Github external
raise NotImplementedError("Can only add 1 asset at a time")
        else:
            refgenie_add(rgc, asset_list[0], args.path, args.force)

    elif args.command == PULL_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False)
        if args.force:
            force = True
        elif args.no_overwrite:
            force = False
        else:
            force = None

        outdir = rgc[CFG_FOLDER_KEY]
        if not os.path.exists(outdir):
            raise MissingFolderError(outdir)
        target = _key_to_name(CFG_FOLDER_KEY)
        if not perm_check_x(outdir, target):
            return
        if not _single_folder_writeable(outdir):
            _LOGGER.error("Insufficient permissions to write to {}: {}".
                          format(target, outdir))
            return

        for a in asset_list:
            rgc.pull(a["genome"], a["asset"], a["tag"],
                     unpack=not args.no_untar, force=force)

    elif args.command in [LIST_LOCAL_CMD, LIST_REMOTE_CMD]:
        rgc = RefGenConf(filepath=gencfg, writable=False)
        if args.command == LIST_REMOTE_CMD:
            num_servers = 0
github databio / refgenie / refgenie / exceptions.py View on Github external
def __init__(self, folder):
        """
        Create the error message.

        :param str folder: path attempted to be used as folder to save a file to
        """
        super(MissingFolderError, self).__init__(folder)
github databio / refgenie / refgenie / refgenie.py View on Github external
def _writeable(outdir, strict_exists=False):
    outdir = outdir or "."
    if os.path.exists(outdir):
        return _single_folder_writeable(outdir)
    elif strict_exists:
        raise MissingFolderError(outdir)
    return _writeable(os.path.dirname(outdir), strict_exists)
github databio / refgenie / refgenie / exceptions.py View on Github external
def __init__(self, conf_file=None):
        """
        Create the error message, using optionally an attempt filepath.

        :param str conf_file: path attempted to be used as genome config file
        """
        msg = "You must provide a config file either as an argument or via an environment variable: {}"\
            .format(", ".join(CFG_ENV_VARS))
        if conf_file:
            msg = "Not a file {} -- {}.".format(conf_file, msg)
        super(MissingGenomeConfigError, self).__init__(msg)
github databio / refgenie / refgenie / add_assets_igenome.py View on Github external
def main():
    """ main workflow """
    parser = build_argparser()
    args, remaining_args = parser.parse_known_args()
    cfg = select_config(args.config, refgenconf.CFG_ENV_VARS, check_exist=True, strict_env=True)
    if not cfg:
        raise MissingGenomeConfigError(args.config)
    rgc = refgenconf.RefGenConf(filepath=cfg, writable=True)
    pths = [args.path, mkabs(args.path, rgc.genome_folder)]
    if not untar_or_copy(pths[0], os.path.join(rgc.genome_folder, args.genome)) \
            and not untar_or_copy(pths[1], os.path.join(rgc.genome_folder, args.genome)):
        rgc.unlock()
        raise OSError("Path '{}' does not exist. Tried: {}".format(args.path, " and ".join(pths)))
    path_components = [rgc.genome_folder] + [args.genome] + ["*"] * 3 + ["Sequence"]
    assets_paths = glob(os.path.join(*path_components))
    assert len(assets_paths) > 0, OSError("Your iGenomes directory is corrupted, more than one directory matched by {}."
                                          "\nMatched dirs: {}".format(os.path.join(*path_components),
                                                                      ", ".join(assets_paths)))
    assets_path = assets_paths[0]
    asset_names = [d for d in os.listdir(assets_path) if os.path.isdir(assets_path)]
    processed = []
    for a in asset_names:
        asset_dict = {"genome": args.genome, "asset": a, "tag": None, "seek_key": None}
github databio / refgenie / refgenie / refgenie.py View on Github external
def build_argparser():
    """
    Builds argument parser.

    :return argparse.ArgumentParser
    """

    banner = "%(prog)s - reference genome asset manager"
    additional_description = "\nhttps://refgenie.databio.org"

    parser = VersionInHelpParser(
        prog="refgenie",
        version=__version__,
        description=banner,
        epilog=additional_description)

    subparsers = parser.add_subparsers(dest="command")

    def add_subparser(cmd, description):
        return subparsers.add_parser(
            cmd, description=description, help=description)

    sps = {}
    for cmd, desc in SUBPARSER_MESSAGES.items():
        sps[cmd] = add_subparser(cmd, desc)
        # It's required for init
        sps[cmd].add_argument(
            '-c', '--genome-config', required=(cmd == INIT_CMD), dest="genome_config",
            help="Path to local genome configuration file. Optional if {} environment variable is set."
github databio / refgenie / refgenie / build_all_genome.py View on Github external
"""
    return ["--" + x + " " for x in req_input]


subdir_path = _make_sub_dir(args.path, args.genome)
dcc = divvy.ComputingConfiguration()
dcc.activate_package("slurm")
cmd_template = "refgenie build -g {g} -a {a} {req_input_str}"
genome = args.genome
to_remove = ["genome", "path"]

data = vars(args)
for i in to_remove:
    data.pop(i)

for asset in asset_build_packages:
    sub_script = os.path.join(subdir_path, asset + ".sub")
    req_input = asset_build_packages[asset]["required_inputs"]
    if req_input:
        print("{} asset requires additional input in the command ({}), so '{}'"
            " requires manual edit".format(asset, req_input, sub_script))
        req_str = " ".join(_req_input_to_args(req_input))
    else:
        req_str = ""
    data["CODE"] = cmd_template.format(g=genome, a=asset, req_input_str=req_str)
    data["LOGFILE"] = asset + ".log"
    data["JOBNAME"] = asset + "Build"
    dcc.write_script(sub_script, data)
github databio / refgenie / refgenie / build_all_genome.py View on Github external
subdir_path = _make_sub_dir(args.path, args.genome)
dcc = divvy.ComputingConfiguration()
dcc.activate_package("slurm")
cmd_template = "refgenie build -g {g} -a {a} {req_input_str}"
genome = args.genome
to_remove = ["genome", "path"]

data = vars(args)
for i in to_remove:
    data.pop(i)

for asset in asset_build_packages:
    sub_script = os.path.join(subdir_path, asset + ".sub")
    req_input = asset_build_packages[asset]["required_inputs"]
    if req_input:
        print("{} asset requires additional input in the command ({}), so '{}'"
            " requires manual edit".format(asset, req_input, sub_script))
        req_str = " ".join(_req_input_to_args(req_input))
    else:
        req_str = ""
    data["CODE"] = cmd_template.format(g=genome, a=asset, req_input_str=req_str)
    data["LOGFILE"] = asset + ".log"
    data["JOBNAME"] = asset + "Build"
    dcc.write_script(sub_script, data)

refgenie

Refgenie creates a standardized folder structure for reference genome files and indexes. You can download pre-built genomes or build your own for any fasta file

BSD-2-Clause
Latest version published 3 years ago

Package Health Score

48 / 100
Full package analysis

Similar packages