How to use refgenconf - 10 common examples

To help you get started, we’ve selected a few refgenconf examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github databio / refgenie / refgenie / refgenie.py View on Github external
data = json.load(json_file)
                entries.update(data)
            else:
                raise FileNotFoundError(
                    "JSON file with config init settings does not exist: {}".
                        format(args.settings_json))
        if args.genome_folder:
            entries.update({CFG_FOLDER_KEY: args.genome_folder})
        if args.remote_url_base:
            entries.update({CFG_REMOTE_URL_BASE_KEY: args.remote_url_base})
        if args.genome_archive_folder:
            entries.update({CFG_ARCHIVE_KEY: args.genome_archive_folder})
        if args.genome_archive_config:
            entries.update({CFG_ARCHIVE_CONFIG_KEY: args.genome_archive_config})
        _LOGGER.debug("initializing with entries: {}".format(entries))
        rgc = RefGenConf(entries=entries)
        rgc.initialize_config_file(os.path.abspath(gencfg))

    elif args.command == BUILD_CMD:
        if not all([x["genome"] == asset_list[0]["genome"] for x in asset_list]):
            _LOGGER.error("Build can only build assets for one genome")
            sys.exit(1)
        recipe_name = None
        if args.recipe:
            if len(asset_list) > 1:
                _LOGGER.error("Recipes cannot be specified for multi-asset builds")
                sys.exit(1)
            recipe_name = args.recipe
        if args.requirements:
            for a in asset_list:
                recipe = recipe_name or a["asset"]
                if recipe not in asset_build_packages.keys():
github databio / refgenie / refgenie / refgenie.py View on Github external
def main():
    """ Primary workflow """
    parser = logmuse.add_logging_options(build_argparser())
    args, remaining_args = parser.parse_known_args()
    global _LOGGER
    _LOGGER = logmuse.logger_via_cli(args, make_root=True)
    _LOGGER.debug("refgenie {}".format(__version__))
    _LOGGER.debug("Args: {}".format(args))

    if not args.command:
        parser.print_help()
        _LOGGER.error("No command given")
        sys.exit(1)

    gencfg = refgenconf.select_genome_config(filename=args.genome_config, check_exist=not args.command == INIT_CMD,
                                             on_missing=lambda fp: fp, strict_env=True)
    if gencfg is None:
        raise MissingGenomeConfigError(args.genome_config)
    _LOGGER.debug("Determined genome config: {}".format(gencfg))

    # From user input we want to construct a list of asset dicts, where each
    # asset has a genome name, asset name, and tag

    if "asset_registry_paths" in args and args.asset_registry_paths:
        _LOGGER.debug("Found registry_path: {}".format(args.asset_registry_paths))
        asset_list = [parse_registry_path(x) for x in args.asset_registry_paths]

        for a in asset_list:
            # every asset must have a genome, either provided via registry path
            # or the args.genome arg.
            if not a["genome"]:
github databio / refgenie / refgenie / add_assets_igenome.py View on Github external
def main():
    """ main workflow """
    parser = build_argparser()
    args, remaining_args = parser.parse_known_args()
    cfg = select_config(args.config, refgenconf.CFG_ENV_VARS, check_exist=True, strict_env=True)
    if not cfg:
        raise MissingGenomeConfigError(args.config)
    rgc = refgenconf.RefGenConf(filepath=cfg, writable=True)
    pths = [args.path, mkabs(args.path, rgc.genome_folder)]
    if not untar_or_copy(pths[0], os.path.join(rgc.genome_folder, args.genome)) \
            and not untar_or_copy(pths[1], os.path.join(rgc.genome_folder, args.genome)):
        rgc.unlock()
        raise OSError("Path '{}' does not exist. Tried: {}".format(args.path, " and ".join(pths)))
    path_components = [rgc.genome_folder] + [args.genome] + ["*"] * 3 + ["Sequence"]
    assets_paths = glob(os.path.join(*path_components))
    assert len(assets_paths) > 0, OSError("Your iGenomes directory is corrupted, more than one directory matched by {}."
                                          "\nMatched dirs: {}".format(os.path.join(*path_components),
                                                                      ", ".join(assets_paths)))
    assets_path = assets_paths[0]
    asset_names = [d for d in os.listdir(assets_path) if os.path.isdir(assets_path)]
    processed = []
    for a in asset_names:
        asset_dict = {"genome": args.genome, "asset": a, "tag": None, "seek_key": None}
        asset_path = os.path.relpath(os.path.join(assets_path, a), rgc.genome_folder)
github databio / refgenie / refgenie / refgenie.py View on Github external
try:
            # run build command
            signal.signal(signal.SIGINT, _handle_sigint(gat))
            pm.run(command_list_populated, target, container=pm.container)
        except pypiper.exceptions.SubprocessError:
            _LOGGER.error("asset '{}' build failed".format(asset_key))
            return False
        else:
            # save build recipe to the JSON-formatted file
            recipe_file_name = TEMPLATE_RECIPE_JSON.format(asset_key, tag)
            with open(os.path.join(log_outfolder, recipe_file_name), 'w') as outfile:
                json.dump(build_pkg, outfile)
            # in order to prevent locking the config file for writing once while
            # being able to use the seek method for digest calculation we
            # create a temporary object to run seek on.
            tmp_rgc = RefGenConf()
            tmp_rgc[CFG_FOLDER_KEY] = rgc[CFG_FOLDER_KEY]
            tmp_rgc.update_tags(*gat, data={CFG_ASSET_PATH_KEY: asset_key})
            tmp_rgc.update_seek_keys(*gat, keys={k: v.format(**asset_vars) for k, v in build_pkg[ASSETS].items()})
            digest = get_dir_digest(
                _seek(tmp_rgc, genome, asset_key, tag, enclosing_dir=True), pm)
            _LOGGER.info("Asset digest: {}".format(digest))
            del tmp_rgc
            # add updates to config file
            with rgc as r:
                r.update_assets(*gat[0:2], data={CFG_ASSET_DESC_KEY: build_pkg[DESC]})
                r.update_tags(*gat, data={CFG_ASSET_PATH_KEY: asset_key,
                                          CFG_ASSET_CHECKSUM_KEY: digest})
                r.update_seek_keys(*gat, keys={k: v.format(**asset_vars) for k, v in build_pkg[ASSETS].items()})
                r.set_default_pointer(*gat)
        pm.stop_pipeline()
        return True
github databio / pepatac / pipelines / pepatac.py View on Github external
def _add_resources(args, res, asset_dict=None):
    """
    Add additional resources needed for pipeline.

    :param argparse.Namespace args: binding between option name and argument,
        e.g. from parsing command-line options
    :param pm.config.resources res: pipeline manager resources list
    :param asset_dict list: list of dictionary of assets to add
    """

    rgc = RGC(select_genome_config(res.get("genome_config")))

    key_errors = []
    exist_errors = []
    required_list = []

    # Check that bowtie2 indicies exist for specified prealignments
    for reference in args.prealignments:
        for asset in [BT2_IDX_KEY]:
            try:
                res[asset] = rgc.seek(reference, asset)
            except KeyError:
                err_msg = "{} for {} is missing from REFGENIE config file."
                pm.fail_pipeline(KeyError(err_msg.format(asset, reference)))
            except:
                err_msg = "{} for {} does not exist."
                pm.fail_pipeline(IOError(err_msg.format(asset, reference)))
github databio / refgenie / refgenie / refgenie.py View on Github external
def refgenie_build(gencfg, genome, asset_list, recipe_name, args):
    """
    Runs the refgenie build recipe.

    :param str gencfg: path to the genome configuration file
    :param argparse.Namespace args: parsed command-line options/arguments
    """
    rgc = RefGenConf(filepath=gencfg, writable=False)
    specified_args = _parse_user_build_input(args.files)
    specified_params = _parse_user_build_input(args.params)

    if not hasattr(args, "outfolder") or not args.outfolder:
        # Default to genome_folder
        _LOGGER.debug("No outfolder provided, using genome config.")
        args.outfolder = rgc[CFG_FOLDER_KEY]

    _LOGGER.debug("Default config file: {}".format(default_config_file()))

    if args.config_file and not os.path.isfile(args.config_file):
        _LOGGER.debug("Config file path isn't a file: {}".
                      format(args.config_file))
        args.config_file = default_config_file()

    def build_asset(genome, asset_key, tag, build_pkg, genome_outfolder, specific_args, specific_params, **kwargs):
github databio / refgenie / refgenie / refgenie.py View on Github external
rgc.tag(a["genome"], a["asset"], a["tag"], args.tag)

    elif args.command == ID_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False)
        if len(asset_list) == 1:
            g, a = asset_list[0]["genome"], asset_list[0]["asset"]
            t = asset_list[0]["tag"] or rgc.get_default_tag(g, a)
            print(rgc.id(g, a, t))
            return
        for asset in asset_list:
            g, a = asset["genome"], asset["asset"]
            t = asset["tag"] or rgc.get_default_tag(g, a)
            print("{}/{}:{},".format(g, a, t) + rgc.id(g, a, t))
        return
    elif args.command == SUBSCRIBE_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False)
        rgc.subscribe(urls=args.genome_server, reset=args.reset)
        return
    elif args.command == UNSUBSCRIBE_CMD:
        rgc = RefGenConf(filepath=gencfg, writable=False)
        rgc.unsubscribe(urls=args.genome_server)
        return
github databio / refgenie / refgenie / add_assets_igenome.py View on Github external
def build_argparser():
    """
    Build a parser for this tool

    :return argparse.ArgumentParser: constructed parser
    """
    parser = argparse.ArgumentParser(description='Integrates every asset from the downloaded iGenomes'
                                                 ' tarball/directory with Refgenie asset management system')
    parser.add_argument('-p', '--path', dest="path", type=str,
                        help='path to the desired genome tarball or directory to integrate', required=True)
    parser.add_argument('-g', '--genome', dest="genome", type=str,  help='name to be assigned to the selected genome',
                        required=True)
    parser.add_argument('-c', '--config', dest="config", type=str,
                        help="path to local genome configuration file. Optional if '{}' environment variable is set.".
                        format(", ".join(refgenconf.CFG_ENV_VARS)), required=False)
    return parser
github databio / refgenie / refgenie / add_assets_igenome.py View on Github external
def main():
    """ main workflow """
    parser = build_argparser()
    args, remaining_args = parser.parse_known_args()
    cfg = select_config(args.config, refgenconf.CFG_ENV_VARS, check_exist=True, strict_env=True)
    if not cfg:
        raise MissingGenomeConfigError(args.config)
    rgc = refgenconf.RefGenConf(filepath=cfg, writable=True)
    pths = [args.path, mkabs(args.path, rgc.genome_folder)]
    if not untar_or_copy(pths[0], os.path.join(rgc.genome_folder, args.genome)) \
            and not untar_or_copy(pths[1], os.path.join(rgc.genome_folder, args.genome)):
        rgc.unlock()
        raise OSError("Path '{}' does not exist. Tried: {}".format(args.path, " and ".join(pths)))
    path_components = [rgc.genome_folder] + [args.genome] + ["*"] * 3 + ["Sequence"]
    assets_paths = glob(os.path.join(*path_components))
    assert len(assets_paths) > 0, OSError("Your iGenomes directory is corrupted, more than one directory matched by {}."
                                          "\nMatched dirs: {}".format(os.path.join(*path_components),
                                                                      ", ".join(assets_paths)))
    assets_path = assets_paths[0]
    asset_names = [d for d in os.listdir(assets_path) if os.path.isdir(assets_path)]
    processed = []
github databio / refgenie / refgenie / exceptions.py View on Github external
def __init__(self, conf_file=None):
        """
        Create the error message, using optionally an attempt filepath.

        :param str conf_file: path attempted to be used as genome config file
        """
        msg = "You must provide a config file either as an argument or via an environment variable: {}"\
            .format(", ".join(CFG_ENV_VARS))
        if conf_file:
            msg = "Not a file {} -- {}.".format(conf_file, msg)
        super(MissingGenomeConfigError, self).__init__(msg)

refgenconf

A standardized configuration object for reference genome assemblies

BSD-2-Clause
Latest version published 3 years ago

Package Health Score

44 / 100
Full package analysis

Similar packages