Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
data = json.load(json_file)
entries.update(data)
else:
raise FileNotFoundError(
"JSON file with config init settings does not exist: {}".
format(args.settings_json))
if args.genome_folder:
entries.update({CFG_FOLDER_KEY: args.genome_folder})
if args.remote_url_base:
entries.update({CFG_REMOTE_URL_BASE_KEY: args.remote_url_base})
if args.genome_archive_folder:
entries.update({CFG_ARCHIVE_KEY: args.genome_archive_folder})
if args.genome_archive_config:
entries.update({CFG_ARCHIVE_CONFIG_KEY: args.genome_archive_config})
_LOGGER.debug("initializing with entries: {}".format(entries))
rgc = RefGenConf(entries=entries)
rgc.initialize_config_file(os.path.abspath(gencfg))
elif args.command == BUILD_CMD:
if not all([x["genome"] == asset_list[0]["genome"] for x in asset_list]):
_LOGGER.error("Build can only build assets for one genome")
sys.exit(1)
recipe_name = None
if args.recipe:
if len(asset_list) > 1:
_LOGGER.error("Recipes cannot be specified for multi-asset builds")
sys.exit(1)
recipe_name = args.recipe
if args.requirements:
for a in asset_list:
recipe = recipe_name or a["asset"]
if recipe not in asset_build_packages.keys():
def main():
""" Primary workflow """
parser = logmuse.add_logging_options(build_argparser())
args, remaining_args = parser.parse_known_args()
global _LOGGER
_LOGGER = logmuse.logger_via_cli(args, make_root=True)
_LOGGER.debug("refgenie {}".format(__version__))
_LOGGER.debug("Args: {}".format(args))
if not args.command:
parser.print_help()
_LOGGER.error("No command given")
sys.exit(1)
gencfg = refgenconf.select_genome_config(filename=args.genome_config, check_exist=not args.command == INIT_CMD,
on_missing=lambda fp: fp, strict_env=True)
if gencfg is None:
raise MissingGenomeConfigError(args.genome_config)
_LOGGER.debug("Determined genome config: {}".format(gencfg))
# From user input we want to construct a list of asset dicts, where each
# asset has a genome name, asset name, and tag
if "asset_registry_paths" in args and args.asset_registry_paths:
_LOGGER.debug("Found registry_path: {}".format(args.asset_registry_paths))
asset_list = [parse_registry_path(x) for x in args.asset_registry_paths]
for a in asset_list:
# every asset must have a genome, either provided via registry path
# or the args.genome arg.
if not a["genome"]:
def main():
""" main workflow """
parser = build_argparser()
args, remaining_args = parser.parse_known_args()
cfg = select_config(args.config, refgenconf.CFG_ENV_VARS, check_exist=True, strict_env=True)
if not cfg:
raise MissingGenomeConfigError(args.config)
rgc = refgenconf.RefGenConf(filepath=cfg, writable=True)
pths = [args.path, mkabs(args.path, rgc.genome_folder)]
if not untar_or_copy(pths[0], os.path.join(rgc.genome_folder, args.genome)) \
and not untar_or_copy(pths[1], os.path.join(rgc.genome_folder, args.genome)):
rgc.unlock()
raise OSError("Path '{}' does not exist. Tried: {}".format(args.path, " and ".join(pths)))
path_components = [rgc.genome_folder] + [args.genome] + ["*"] * 3 + ["Sequence"]
assets_paths = glob(os.path.join(*path_components))
assert len(assets_paths) > 0, OSError("Your iGenomes directory is corrupted, more than one directory matched by {}."
"\nMatched dirs: {}".format(os.path.join(*path_components),
", ".join(assets_paths)))
assets_path = assets_paths[0]
asset_names = [d for d in os.listdir(assets_path) if os.path.isdir(assets_path)]
processed = []
for a in asset_names:
asset_dict = {"genome": args.genome, "asset": a, "tag": None, "seek_key": None}
asset_path = os.path.relpath(os.path.join(assets_path, a), rgc.genome_folder)
try:
# run build command
signal.signal(signal.SIGINT, _handle_sigint(gat))
pm.run(command_list_populated, target, container=pm.container)
except pypiper.exceptions.SubprocessError:
_LOGGER.error("asset '{}' build failed".format(asset_key))
return False
else:
# save build recipe to the JSON-formatted file
recipe_file_name = TEMPLATE_RECIPE_JSON.format(asset_key, tag)
with open(os.path.join(log_outfolder, recipe_file_name), 'w') as outfile:
json.dump(build_pkg, outfile)
# in order to prevent locking the config file for writing once while
# being able to use the seek method for digest calculation we
# create a temporary object to run seek on.
tmp_rgc = RefGenConf()
tmp_rgc[CFG_FOLDER_KEY] = rgc[CFG_FOLDER_KEY]
tmp_rgc.update_tags(*gat, data={CFG_ASSET_PATH_KEY: asset_key})
tmp_rgc.update_seek_keys(*gat, keys={k: v.format(**asset_vars) for k, v in build_pkg[ASSETS].items()})
digest = get_dir_digest(
_seek(tmp_rgc, genome, asset_key, tag, enclosing_dir=True), pm)
_LOGGER.info("Asset digest: {}".format(digest))
del tmp_rgc
# add updates to config file
with rgc as r:
r.update_assets(*gat[0:2], data={CFG_ASSET_DESC_KEY: build_pkg[DESC]})
r.update_tags(*gat, data={CFG_ASSET_PATH_KEY: asset_key,
CFG_ASSET_CHECKSUM_KEY: digest})
r.update_seek_keys(*gat, keys={k: v.format(**asset_vars) for k, v in build_pkg[ASSETS].items()})
r.set_default_pointer(*gat)
pm.stop_pipeline()
return True
def _add_resources(args, res, asset_dict=None):
"""
Add additional resources needed for pipeline.
:param argparse.Namespace args: binding between option name and argument,
e.g. from parsing command-line options
:param pm.config.resources res: pipeline manager resources list
:param asset_dict list: list of dictionary of assets to add
"""
rgc = RGC(select_genome_config(res.get("genome_config")))
key_errors = []
exist_errors = []
required_list = []
# Check that bowtie2 indicies exist for specified prealignments
for reference in args.prealignments:
for asset in [BT2_IDX_KEY]:
try:
res[asset] = rgc.seek(reference, asset)
except KeyError:
err_msg = "{} for {} is missing from REFGENIE config file."
pm.fail_pipeline(KeyError(err_msg.format(asset, reference)))
except:
err_msg = "{} for {} does not exist."
pm.fail_pipeline(IOError(err_msg.format(asset, reference)))
def refgenie_build(gencfg, genome, asset_list, recipe_name, args):
"""
Runs the refgenie build recipe.
:param str gencfg: path to the genome configuration file
:param argparse.Namespace args: parsed command-line options/arguments
"""
rgc = RefGenConf(filepath=gencfg, writable=False)
specified_args = _parse_user_build_input(args.files)
specified_params = _parse_user_build_input(args.params)
if not hasattr(args, "outfolder") or not args.outfolder:
# Default to genome_folder
_LOGGER.debug("No outfolder provided, using genome config.")
args.outfolder = rgc[CFG_FOLDER_KEY]
_LOGGER.debug("Default config file: {}".format(default_config_file()))
if args.config_file and not os.path.isfile(args.config_file):
_LOGGER.debug("Config file path isn't a file: {}".
format(args.config_file))
args.config_file = default_config_file()
def build_asset(genome, asset_key, tag, build_pkg, genome_outfolder, specific_args, specific_params, **kwargs):
rgc.tag(a["genome"], a["asset"], a["tag"], args.tag)
elif args.command == ID_CMD:
rgc = RefGenConf(filepath=gencfg, writable=False)
if len(asset_list) == 1:
g, a = asset_list[0]["genome"], asset_list[0]["asset"]
t = asset_list[0]["tag"] or rgc.get_default_tag(g, a)
print(rgc.id(g, a, t))
return
for asset in asset_list:
g, a = asset["genome"], asset["asset"]
t = asset["tag"] or rgc.get_default_tag(g, a)
print("{}/{}:{},".format(g, a, t) + rgc.id(g, a, t))
return
elif args.command == SUBSCRIBE_CMD:
rgc = RefGenConf(filepath=gencfg, writable=False)
rgc.subscribe(urls=args.genome_server, reset=args.reset)
return
elif args.command == UNSUBSCRIBE_CMD:
rgc = RefGenConf(filepath=gencfg, writable=False)
rgc.unsubscribe(urls=args.genome_server)
return
def build_argparser():
"""
Build a parser for this tool
:return argparse.ArgumentParser: constructed parser
"""
parser = argparse.ArgumentParser(description='Integrates every asset from the downloaded iGenomes'
' tarball/directory with Refgenie asset management system')
parser.add_argument('-p', '--path', dest="path", type=str,
help='path to the desired genome tarball or directory to integrate', required=True)
parser.add_argument('-g', '--genome', dest="genome", type=str, help='name to be assigned to the selected genome',
required=True)
parser.add_argument('-c', '--config', dest="config", type=str,
help="path to local genome configuration file. Optional if '{}' environment variable is set.".
format(", ".join(refgenconf.CFG_ENV_VARS)), required=False)
return parser
def main():
""" main workflow """
parser = build_argparser()
args, remaining_args = parser.parse_known_args()
cfg = select_config(args.config, refgenconf.CFG_ENV_VARS, check_exist=True, strict_env=True)
if not cfg:
raise MissingGenomeConfigError(args.config)
rgc = refgenconf.RefGenConf(filepath=cfg, writable=True)
pths = [args.path, mkabs(args.path, rgc.genome_folder)]
if not untar_or_copy(pths[0], os.path.join(rgc.genome_folder, args.genome)) \
and not untar_or_copy(pths[1], os.path.join(rgc.genome_folder, args.genome)):
rgc.unlock()
raise OSError("Path '{}' does not exist. Tried: {}".format(args.path, " and ".join(pths)))
path_components = [rgc.genome_folder] + [args.genome] + ["*"] * 3 + ["Sequence"]
assets_paths = glob(os.path.join(*path_components))
assert len(assets_paths) > 0, OSError("Your iGenomes directory is corrupted, more than one directory matched by {}."
"\nMatched dirs: {}".format(os.path.join(*path_components),
", ".join(assets_paths)))
assets_path = assets_paths[0]
asset_names = [d for d in os.listdir(assets_path) if os.path.isdir(assets_path)]
processed = []
def __init__(self, conf_file=None):
"""
Create the error message, using optionally an attempt filepath.
:param str conf_file: path attempted to be used as genome config file
"""
msg = "You must provide a config file either as an argument or via an environment variable: {}"\
.format(", ".join(CFG_ENV_VARS))
if conf_file:
msg = "Not a file {} -- {}.".format(conf_file, msg)
super(MissingGenomeConfigError, self).__init__(msg)