How to use the b2luigi.core.utils.flatten_to_dict function in b2luigi

To help you get started, we’ve selected a few b2luigi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nils-braun / b2luigi / b2luigi / batch / processes / gbasf2.py View on Github external
def _download_dataset(self):
        """
        Download the task outputs from the gbasf2 project dataset.

        For each task output defined via ``self.add_to_output(.root)`` a
        directory will be created, into which all files named ``name_*.root`` on
        the grid dataset corresponding to the project name will be downloaded.
        The download is ensured to be automatic by first downloading into
        temporary directories.
        """
        if not check_dataset_exists_on_grid(self.gbasf2_project_name, dirac_user=self.dirac_user):
            raise RuntimeError(f"Not dataset to download under project name {self.gbasf2_project_name}")
        task_output_dict = flatten_to_dict(self.task.output())
        for output_file_name, output_target in task_output_dict.items():
            output_dir_path = output_target.path
            assert output_file_name == os.path.basename(output_file_name)  # not sure I need this
            output_file_stem, output_file_ext = os.path.splitext(output_file_name)
            assert output_file_ext == ".root", "gbasf2 batch only supports root outputs"

            # Get list of files that we want to download from the grid via ``gb2_ds_list`` so that we can
            # then compare this list with the results of the download to see if it was successful
            dataset_query_string = \
                f"/belle/user/{self.dirac_user}/{self.gbasf2_project_name}/{output_file_stem}_*{output_file_ext}"
            ds_list_command = shlex.split(f"gb2_ds_list {dataset_query_string}")
            output_dataset_grid_filepaths = run_with_gbasf2(ds_list_command, capture_output=True).stdout.splitlines()
            output_dataset_basenames = {os.path.basename(grid_path) for grid_path in output_dataset_grid_filepaths}
            # check if dataset had been already downloaded and if so, skip downloading
            if os.path.isdir(output_dir_path) and os.listdir(output_dir_path) == output_dataset_basenames:
                print(f"Dataset already exists in {output_dir_path}, skipping download.")
github nils-braun / b2luigi / b2luigi / core / task.py View on Github external
def _get_output_target(self, key):
        """Shortcut to get the output target for a given key. Will return a luigi target."""
        output_dict = utils.flatten_to_dict(self.output())
        return output_dict[key]
github nils-braun / b2luigi / b2luigi / core / tasks.py View on Github external
def get_output_file_names(self):
        return utils.flatten_to_file_paths(
            utils.flatten_to_dict(self.output())
            )
github nils-braun / b2luigi / b2luigi / core / tasks.py View on Github external
def create_output_dirs(self):
        output_list = utils.flatten_to_dict(self.output())
        output_list = output_list.values()

        for output in output_list:
            output.makedirs()