Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _download_dataset(self):
"""
Download the task outputs from the gbasf2 project dataset.
For each task output defined via ``self.add_to_output(.root)`` a
directory will be created, into which all files named ``name_*.root`` on
the grid dataset corresponding to the project name will be downloaded.
The download is ensured to be automatic by first downloading into
temporary directories.
"""
if not check_dataset_exists_on_grid(self.gbasf2_project_name, dirac_user=self.dirac_user):
raise RuntimeError(f"Not dataset to download under project name {self.gbasf2_project_name}")
task_output_dict = flatten_to_dict(self.task.output())
for output_file_name, output_target in task_output_dict.items():
output_dir_path = output_target.path
assert output_file_name == os.path.basename(output_file_name) # not sure I need this
output_file_stem, output_file_ext = os.path.splitext(output_file_name)
assert output_file_ext == ".root", "gbasf2 batch only supports root outputs"
# Get list of files that we want to download from the grid via ``gb2_ds_list`` so that we can
# then compare this list with the results of the download to see if it was successful
dataset_query_string = \
f"/belle/user/{self.dirac_user}/{self.gbasf2_project_name}/{output_file_stem}_*{output_file_ext}"
ds_list_command = shlex.split(f"gb2_ds_list {dataset_query_string}")
output_dataset_grid_filepaths = run_with_gbasf2(ds_list_command, capture_output=True).stdout.splitlines()
output_dataset_basenames = {os.path.basename(grid_path) for grid_path in output_dataset_grid_filepaths}
# check if dataset had been already downloaded and if so, skip downloading
if os.path.isdir(output_dir_path) and os.listdir(output_dir_path) == output_dataset_basenames:
print(f"Dataset already exists in {output_dir_path}, skipping download.")
def _get_output_target(self, key):
"""Shortcut to get the output target for a given key. Will return a luigi target."""
output_dict = utils.flatten_to_dict(self.output())
return output_dict[key]
def get_output_file_names(self):
return utils.flatten_to_file_paths(
utils.flatten_to_dict(self.output())
)
def create_output_dirs(self):
output_list = utils.flatten_to_dict(self.output())
output_list = output_list.values()
for output in output_list:
output.makedirs()