Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from activitysim.core import inject
from activitysim.core import tracing
from activitysim.core import config
from activitysim.core import pipeline
from activitysim.core import mp_tasks
from activitysim.core import chunk
# from activitysim import abm
logger = logging.getLogger('activitysim')
if __name__ == '__main__':
inject.add_injectable('configs_dir', ['configs', '../example/configs'])
config.handle_standard_args()
mp_tasks.filter_warnings()
tracing.config_logger()
t0 = tracing.print_elapsed_time()
coalesce_rules = config.setting('coalesce')
mp_tasks.coalesce_pipelines(coalesce_rules['names'], coalesce_rules['slice'], use_prefix=False)
checkpoints_df = pipeline.get_checkpoints()
file_path = config.output_file_path('coalesce_checkpoints.csv')
checkpoints_df.to_csv(file_path, index=True)
----------
injectables : dict {: }
dict of injectables passed by parent process
locutor : bool
is this sub process the designated spokesperson
Returns
-------
injects injectables
"""
for k, v in iteritems(injectables):
inject.add_injectable(k, v)
inject.add_injectable("is_sub_task", True)
inject.add_injectable("locutor", locutor)
config.filter_warnings()
process_name = multiprocessing.current_process().name
inject.add_injectable("log_file_prefix", process_name)
tracing.config_logger()
df = df_full.take(prng.choice(len(df_full), size=households_sample_size, replace=False))
households_sliced = True
# if tracing and we missed trace_hh in sample, but it is in full store
if trace_hh_id and trace_hh_id not in df.index and trace_hh_id in df_full.index:
# replace first hh in sample with trace_hh
logger.debug("replacing household %s with %s in household sample" %
(df.index[0], trace_hh_id))
df_hh = df_full.loc[[trace_hh_id]]
df = pd.concat([df_hh, df[1:]])
else:
df = df_full
# persons table
inject.add_injectable('households_sliced', households_sliced)
logger.info("loaded households %s" % (df.shape,))
df.index.name = 'household_id'
# FIXME - pathological knowledge of name of chunk_id column used by chunked_choosers_by_chunk_id
assert 'chunk_id' not in df.columns
df['chunk_id'] = pd.Series(list(range(len(df))), df.index)
# replace table function with dataframe
inject.add_table('households', df)
pipeline.get_rn_generator().add_channel(df, 'households')
if trace_hh_id:
tracing.register_traceable_table('households', df)
def override_injectable(name, value):
inject.add_injectable(name, value)
injectables.append(name)
Returns
-------
injects injectables
"""
for k, v in iteritems(injectables):
inject.add_injectable(k, v)
inject.add_injectable("is_sub_task", True)
inject.add_injectable("locutor", locutor)
config.filter_warnings()
process_name = multiprocessing.current_process().name
inject.add_injectable("log_file_prefix", process_name)
tracing.config_logger()
def cache_spec(hhsize, spec):
spec_name = cached_spec_name(hhsize)
# cache as injectable
inject.add_injectable(spec_name, spec)
Parameters
----------
queue : multiprocessing.Queue
step_info : dict
step_info for current step from multiprocess_steps
resume_after : str or None
shared_data_buffer : dict
dict of shared data (e.g. skims and shadow_pricing)
"""
models = step_info['models']
chunk_size = step_info['chunk_size']
# step_label = step_info['name']
num_processes = step_info['num_processes']
inject.add_injectable('data_buffers', shared_data_buffer)
inject.add_injectable("chunk_size", chunk_size)
inject.add_injectable("num_processes", num_processes)
if resume_after:
logger.info('resume_after %s', resume_after)
# if they specified a resume_after model, check to make sure it is checkpointed
if resume_after != LAST_CHECKPOINT and \
resume_after not in pipeline.get_checkpoints()[pipeline.CHECKPOINT_NAME].values:
# if not checkpointed, then fall back to last checkpoint
logger.info("resume_after checkpoint '%s' not in pipeline.", resume_after)
resume_after = LAST_CHECKPOINT
pipeline.open_pipeline(resume_after)
last_checkpoint = pipeline.last_checkpoint()
queue : multiprocessing.Queue
step_info : dict
step_info for current step from multiprocess_steps
resume_after : str or None
shared_data_buffer : dict
dict of shared data (e.g. skims and shadow_pricing)
"""
models = step_info['models']
chunk_size = step_info['chunk_size']
# step_label = step_info['name']
num_processes = step_info['num_processes']
inject.add_injectable('data_buffers', shared_data_buffer)
inject.add_injectable("chunk_size", chunk_size)
inject.add_injectable("num_processes", num_processes)
if resume_after:
logger.info('resume_after %s', resume_after)
# if they specified a resume_after model, check to make sure it is checkpointed
if resume_after != LAST_CHECKPOINT and \
resume_after not in pipeline.get_checkpoints()[pipeline.CHECKPOINT_NAME].values:
# if not checkpointed, then fall back to last checkpoint
logger.info("resume_after checkpoint '%s' not in pipeline.", resume_after)
resume_after = LAST_CHECKPOINT
pipeline.open_pipeline(resume_after)
last_checkpoint = pipeline.last_checkpoint()
if last_checkpoint in models:
logger.info("Resuming model run list after %s", last_checkpoint)
want to know which sub-processes completed successfully, because if resume_after
is LAST_CHECKPOINT we don't have to rerun the successful ones.
Parameters
----------
step_name : str
crumb : str
value : yaml-writable value
Returns
-------
"""
breadcrumbs = inject.get_injectable('breadcrumbs', OrderedDict())
breadcrumbs.setdefault(step_name, {'name': step_name})[crumb] = value
inject.add_injectable('breadcrumbs', breadcrumbs)
write_breadcrumbs(breadcrumbs)