Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import b2luigi
import random
class MyNumberTask(b2luigi.Task):
some_parameter = b2luigi.IntParameter()
def output(self):
return b2luigi.LocalTarget(f"results/output_file_{self.some_parameter}.txt")
def run(self):
random_number = random.random()
with self.output().open("w") as f:
f.write(f"{random_number}\n")
if __name__ == "__main__":
b2luigi.set_setting("result_dir", "results")
b2luigi.process([MyNumberTask(some_parameter=i) for i in range(100)],
workers=200)
def test_requires(self):
class TaskA(b2luigi.Task):
some_parameter = b2luigi.IntParameter()
some_other_parameter = b2luigi.IntParameter()
def output(self):
yield self.add_to_output("test.txt")
@b2luigi.requires(TaskA, some_parameter=3)
class TaskB(b2luigi.Task):
another_parameter = b2luigi.IntParameter()
def output(self):
yield self.add_to_output("out.dat")
task = TaskB(some_other_parameter=1, another_parameter=42)
self.assertEqual(sorted(task.get_param_names()), ["another_parameter", "some_other_parameter"])
self.assertEqual(task.another_parameter, 42)
self.assertEqual(task.some_other_parameter, 1)
def test_dependencies(self):
class TaskA(b2luigi.Task):
some_parameter = b2luigi.IntParameter()
def output(self):
yield self.add_to_output("file_a")
@b2luigi.requires(TaskA)
class TaskB(b2luigi.Task):
def output(self):
yield self.add_to_output("file_b")
task = TaskB(some_parameter=42)
self.assertEqual(get_filled_params(task), {"some_parameter": 42})
self.assertEqual(len(task._get_input_targets("file_a")), 1)
self.assertEqual(len(task.get_input_file_names("file_a")), 1)
self.assertEqual(len(task.get_input_file_names().keys()), 1)
self.assertEqual(task._get_input_targets("file_a")[0].path, task.get_input_file_names("file_a")[0])
def test_requires(self):
class TaskA(b2luigi.Task):
some_parameter = b2luigi.IntParameter()
some_other_parameter = b2luigi.IntParameter()
def output(self):
yield self.add_to_output("test.txt")
@b2luigi.requires(TaskA, some_parameter=3)
class TaskB(b2luigi.Task):
another_parameter = b2luigi.IntParameter()
def output(self):
yield self.add_to_output("out.dat")
task = TaskB(some_other_parameter=1, another_parameter=42)
self.assertEqual(sorted(task.get_param_names()), ["another_parameter", "some_other_parameter"])
self.assertEqual(task.another_parameter, 42)
self.assertEqual(task.some_other_parameter, 1)
def test_file_path_usage(self):
class TaskA(b2luigi.Task):
some_parameter = b2luigi.IntParameter()
def output(self):
yield self.add_to_output("file_a")
yield self.add_to_output("file_b")
task = TaskA(some_parameter=3)
b2luigi.set_setting("result_dir", "results/some_crazy_path")
self.assertEqual(get_filled_params(task), {"some_parameter": 3})
self.assertFalse(task.get_input_file_names())
self.assertRaises(KeyError, lambda: task._get_input_targets("some_file"))
self.assertEqual(task._get_output_target("file_a").path, task.get_output_file_name("file_a"))
self.assertIn("file_a", task.get_output_file_name("file_a"))
self.assertIn("file_b", task.get_output_file_name("file_b"))
self.assertIn("some_parameter=3", task.get_output_file_name("file_a"))
serialized_parameters = get_serialized_parameters(self)
# Git hash should go to the front
return_dict = collections.OrderedDict()
return_dict["git_hash"] = serialized_parameters["git_hash"]
for key, value in serialized_parameters.items():
return_dict[key] = value
return return_dict
class Basf2PathTask(Basf2Task):
num_processes = b2luigi.IntParameter(significant=False, default=0)
max_event = b2luigi.IntParameter(significant=False, default=0)
def create_path(self):
raise NotImplementedError()
@b2luigi.on_temporary_files
def process(self):
assert get_basf2_git_hash() == self.git_hash
try:
import basf2
import ROOT
except ImportError:
raise ImportError("Can not find ROOT or basf2. Can not use the basf2 task.")
if self.num_processes:
basf2.set_nprocesses(self.num_processes)
'daughter(0, kaonID)', 'daughter(1, pionID)', 'isSignal', 'mcErrors'],
filename=self.get_output_file_name("D_n_tuple.root"),
path=path)
modularAnalysis.variablesToNtuple('B-',
['Mbc', 'deltaE', 'isSignal', 'mcErrors', 'M'],
filename=self.get_output_file_name("B_n_tuple.root"),
path=path)
return path
def output(self):
yield self.add_to_output("D_n_tuple.root")
yield self.add_to_output("B_n_tuple.root")
class MasterTask(Basf2nTupleMergeTask):
n_events = luigi.IntParameter()
def requires(self):
for event_type in SimulationType:
yield self.clone(AnalysisTask, event_type=event_type)
if __name__ == "__main__":
luigi.process(MasterTask(n_events=1), workers=4)
experiment_number = b2luigi.IntParameter()
run_number = b2luigi.IntParameter()
prefix = b2luigi.Parameter()
file_name = b2luigi.Parameter()
class RawDataTask(DataTask):
data_mode = DataMode.raw
def output(self):
yield {"raw_output.root": b2luigi.LocalTarget(_build_data_path(self))}
class DstDataTask(DataTask):
release = b2luigi.Parameter()
prod = b2luigi.IntParameter()
database = b2luigi.IntParameter()
def output(self):
yield {"full_output.root": b2luigi.LocalTarget(_build_data_path(self))}
class SkimmedRawDataTask(DstDataTask):
data_mode = DataMode.skimmed_raw
def output(self):
yield {"raw_output.root": b2luigi.LocalTarget(_build_data_path(self))}
class MdstDataTask(DstDataTask):
data_mode = DataMode.mdst
def get_serialized_parameters(self):
serialized_parameters = get_serialized_parameters(self)
# Git hash should go to the front
return_dict = collections.OrderedDict()
return_dict["git_hash"] = serialized_parameters["git_hash"]
for key, value in serialized_parameters.items():
return_dict[key] = value
return return_dict
class Basf2PathTask(Basf2Task):
num_processes = b2luigi.IntParameter(significant=False, default=0)
max_event = b2luigi.IntParameter(significant=False, default=0)
def create_path(self):
raise NotImplementedError()
@b2luigi.on_temporary_files
def process(self):
assert get_basf2_git_hash() == self.git_hash
try:
import basf2
import ROOT
except ImportError:
raise ImportError("Can not find ROOT or basf2. Can not use the basf2 task.")
if self.num_processes:
import b2luigi
from parse import parse
class DataMode(enum.Enum):
raw = "raw"
mdst = "mdst"
cdst = "cdst"
skimmed_raw = "skimmed_raw"
class DataTask(b2luigi.ExternalTask):
data_mode = b2luigi.EnumParameter(enum=DataMode)
experiment_number = b2luigi.IntParameter()
run_number = b2luigi.IntParameter()
prefix = b2luigi.Parameter()
file_name = b2luigi.Parameter()
class RawDataTask(DataTask):
data_mode = DataMode.raw
def output(self):
yield {"raw_output.root": b2luigi.LocalTarget(_build_data_path(self))}
class DstDataTask(DataTask):
release = b2luigi.Parameter()
prod = b2luigi.IntParameter()
database = b2luigi.IntParameter()