Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
This fetches from run metadata, and if this fails, it
estimates it using data metadata from targets.
"""
try:
# Use run metadata, if it is available, to get
# the run start time (floored to seconds)
t0 = self.run_metadata(run_id, 'start')['start']
t0 = t0.replace(tzinfo=datetime.timezone.utc)
return int(t0.timestamp()) * int(1e9)
except (strax.RunMetadataNotAvailable, KeyError):
pass
# Get an approx start from the data itself,
# then floor it to seconds for consistency
if targets:
for t in strax.to_str_tuple(targets):
try:
t0 = self.get_meta(run_id, t)['chunks'][0]['start']
return (int(t0) // int(1e9)) * int(1e9)
except strax.DataNotAvailable:
pass
warnings.warn(
"Could not estimate run start time from "
"run metadata: assuming it is 0",
UserWarning)
return 0
def make(self, run_id: ty.Union[str, tuple, list],
targets, save=tuple(), max_workers=None,
_skip_if_built=True,
**kwargs) -> None:
"""Compute target for run_id. Returns nothing (None).
{get_docs}
"""
# Multi-run support
run_ids = strax.to_str_tuple(run_id)
if len(run_ids) == 0:
raise ValueError("Cannot build empty list of runs")
if len(run_ids) > 1:
return strax.multi_run(
self.get_array, run_ids, targets=targets,
throw_away_result=True,
save=save, max_workers=max_workers, **kwargs)
if _skip_if_built and self.is_stored(run_id, targets):
return
for _ in self.get_iter(run_ids[0], targets,
save=save, max_workers=max_workers, **kwargs):
pass
def __init__(self):
if not hasattr(self, 'depends_on'):
raise ValueError('depends_on not provided for '
f'{self.__class__.__name__}')
self.depends_on = strax.to_str_tuple(self.depends_on)
# Store compute parameter names, see if we take chunk_i too
compute_pars = list(
inspect.signature(self.compute).parameters.keys())
if 'chunk_i' in compute_pars:
self.compute_takes_chunk_i = True
del compute_pars[compute_pars.index('chunk_i')]
if 'start' in compute_pars:
if 'end' not in compute_pars:
raise ValueError(f"Compute of {self} takes start, "
f"so it should also take end.")
self.compute_takes_start_end = True
del compute_pars[compute_pars.index('start')]
del compute_pars[compute_pars.index('end')]
self.compute_pars = compute_pars
def get_array(self, run_id: ty.Union[str, tuple, list],
targets, save=tuple(), max_workers=None,
**kwargs) -> np.ndarray:
"""Compute target for run_id and return as numpy array
{get_docs}
"""
run_ids = strax.to_str_tuple(run_id)
if len(run_ids) > 1:
results = strax.multi_run(
self.get_array, run_ids, targets=targets,
save=save, max_workers=max_workers, **kwargs)
else:
source = self.get_iter(
run_ids[0],
targets,
save=save,
max_workers=max_workers,
**kwargs)
results = [x.data for x in source]
return np.concatenate(results)
dsets = dsets[mask]
if include_tags is not None:
dsets = dsets[_tags_match(dsets,
include_tags,
pattern_type,
ignore_underscore)]
if exclude_tags is not None:
dsets = dsets[True ^ _tags_match(dsets,
exclude_tags,
pattern_type,
ignore_underscore)]
have_available = strax.to_str_tuple(available)
for d in have_available:
if not d + '_available' in dsets.columns:
# Get extra availability info from the run db
self.runs[d + '_available'] = np.in1d(
self.runs.name.values,
self.list_available(d))
dsets = dsets[dsets[d + '_available']]
return dsets
{from_run: np.transpose([start, end])})
elif not 'run_id' in data:
raise ValueError(
"Must provide from_run or data with a run_id column "
"to define a superrun")
else:
df = pd.DataFrame(dict(starts=start, ends=end,
run_id=data['run_id']))
return self.define_run(
name,
{run_id: rs[['start', 'stop']].values.transpose()
for run_id, rs in df.groupby('fromrun')})
if isinstance(data, (list, tuple)):
# list of runids
data = strax.to_str_tuple(data)
return self.define_run(
name,
{run_id: 'all' for run_id in data})
if not isinstance(data, dict):
raise ValueError(f"Can't define run from {type(data)}")
# Find start and end time of the new run = earliest start time of other runs
run_md = dict(start=float('inf'), end=0, livetime=0)
for _subrunid in data:
doc = self.run_metadata(_subrunid, ['start', 'end'])
run_md['start'] = min(run_md['start'], doc['start'])
run_md['end'] = max(run_md['end'], doc['end'])
run_md['livetime'] += doc['end'] - doc['start']
# Superrun names must start with an underscore
def scan_runs(self: strax.Context,
check_available=tuple(),
store_fields=tuple()):
"""Update and return self.runs with runs currently available
in all storage frontends.
:param check_available: Check whether these data types are available
Availability of xxx is stored as a boolean in the xxx_available
column.
:param store_fields: Additional fields from run doc to include
as rows in the dataframe.
The context options scan_availability and store_run_fields list
data types and run fields, respectively, that will always be scanned.
"""
store_fields = tuple(set(
list(strax.to_str_tuple(store_fields))
+ ['name', 'number', 'tags', 'mode',
strax.RUN_DEFAULTS_KEY]
+ list(self.context_config['store_run_fields'])))
check_available = tuple(set(
list(strax.to_str_tuple(check_available))
+ list(self.context_config['check_available'])))
docs = None
for sf in self.storage:
_temp_docs = []
for doc in sf._scan_runs(store_fields=store_fields):
# If there is no number, make one from the name
if 'number' not in doc:
if 'name' not in doc:
raise ValueError(f"Invalid run doc {doc}, contains "
f"neither name nor number.")