Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# uncomment the line below to set random seed so that run results are reproducible
set_random_seed()
inject.add_injectable("set_random_seed", set_random_seed)
tracing.config_logger()
t0 = print_elapsed_time()
taz_skim_stack = inject.get_injectable('taz_skim_dict')
t0 = print_elapsed_time("load taz_skim_dict", t0)
tap_skim_stack = inject.get_injectable('tap_skim_dict')
t0 = print_elapsed_time("load tap_skim_dict", t0)
network_los = inject.get_injectable('network_los')
t0 = print_elapsed_time("load network_los", t0)
# test sizes for all implemented methods
VECTOR_TEST_SIZEs = (10000, 100000, 1000000, 5000000, 10000000, 20000000)
# VECTOR_TEST_SIZEs = [20000000, 40000000]
for size in VECTOR_TEST_SIZEs:
logger.info("VECTOR_TEST_SIZE %s" % size)
get_taz(size)
t0 = print_elapsed_time_per_unit("get_taz", t0, size)
get_tap(size)
t0 = print_elapsed_time_per_unit("get_tap", t0, size)
def get_shadow_pricing_info():
"""
return dict with info about dtype and shapes of desired and modeled size tables
block shape is (num_zones, num_segments + 1)
Returns
-------
shadow_pricing_info: dict
dtype: ,
block_shapes: dict {: }
"""
land_use = inject.get_table('land_use')
size_terms = inject.get_injectable('size_terms')
shadow_settings = config.read_model_settings('shadow_pricing.yaml')
# shadow_pricing_models is dict of {: }
shadow_pricing_models = shadow_settings['shadow_pricing_models']
blocks = OrderedDict()
for model_selector in shadow_pricing_models:
sp_rows = len(land_use)
sp_cols = len(size_terms[size_terms.model_selector == model_selector])
# extra tally column for TALLY_CHECKIN and TALLY_CHECKOUT semaphores
blocks[block_name(model_selector)] = (sp_rows, sp_cols + 1)
sp_dtype = np.int64
Returns
-------
utility_dict : dict
name, entity pairs of locals
"""
utility_dict = {
'pd': pd,
'np': np,
'constants': constants,
'reindex': util.reindex,
'reindex_i': reindex_i,
'setting': config.setting,
'skim_time_period_label': skim_time_period_label,
'other_than': other_than,
'skim_dict': inject.get_injectable('skim_dict', None)
}
return utility_dict
# if they specified a resume_after model, check to make sure it is checkpointed
if resume_after != LAST_CHECKPOINT and \
resume_after not in pipeline.get_checkpoints()[pipeline.CHECKPOINT_NAME].values:
# if not checkpointed, then fall back to last checkpoint
logger.info("resume_after checkpoint '%s' not in pipeline.", resume_after)
resume_after = LAST_CHECKPOINT
pipeline.open_pipeline(resume_after)
last_checkpoint = pipeline.last_checkpoint()
if last_checkpoint in models:
logger.info("Resuming model run list after %s", last_checkpoint)
models = models[models.index(last_checkpoint) + 1:]
# preload any bulky injectables (e.g. skims) not in pipeline
inject.get_injectable('preload_injectables', None)
t0 = tracing.print_elapsed_time()
for model in models:
t1 = tracing.print_elapsed_time()
try:
pipeline.run_model(model)
except Exception as e:
logger.warning("%s exception running %s model: %s", type(e).__name__, model, str(e),
exc_info=True)
raise e
queue.put({'model': model, 'time': time.time()-t1})
tracing.print_elapsed_time("run (%s models)" % len(models), t0)
return target_ids, column
if slicer is None:
slicer = df.index.name
if isinstance(df, pd.DataFrame):
# always slice by household id if we can
if 'household_id' in df.columns:
slicer = 'household_id'
if slicer in df.columns:
column = slicer
if column is None and df.index.name != slicer:
raise RuntimeError("bad slicer '%s' for df with index '%s'" % (slicer, df.index.name))
traceable_table_indexes = inject.get_injectable('traceable_table_indexes', {})
traceable_table_ids = inject.get_injectable('traceable_table_ids', {})
if df.empty:
target_ids = None
elif slicer in traceable_table_indexes:
# maps 'person_id' to 'persons', etc
table_name = traceable_table_indexes[slicer]
target_ids = traceable_table_ids.get(table_name, [])
elif slicer == 'TAZ':
target_ids = inject.get_injectable('trace_od', [])
return target_ids, column
logger.info("load_checkpoint %s timestamp %s"
% (checkpoint_name, _PIPELINE.last_checkpoint['timestamp']))
tables = checkpointed_tables()
loaded_tables = {}
for table_name in tables:
# read dataframe from pipeline store
df = read_df(table_name, checkpoint_name=_PIPELINE.last_checkpoint[table_name])
logger.info("load_checkpoint table %s %s" % (table_name, df.shape))
# register it as an orca table
rewrap(table_name, df)
loaded_tables[table_name] = df
# register for tracing in order that tracing.register_traceable_table wants us to register them
traceable_tables = inject.get_injectable('traceable_tables', [])
for table_name in traceable_tables:
if table_name in loaded_tables:
tracing.register_traceable_table(table_name, loaded_tables[table_name])
# add tables of known rng channels
rng_channels = inject.get_injectable('rng_channels', [])
if rng_channels:
logger.debug("loading random channels %s" % rng_channels)
for table_name in rng_channels:
if table_name in loaded_tables:
logger.debug("adding channel %s" % (table_name,))
_PIPELINE.rng().add_channel(table_name, loaded_tables[table_name])
def override_setting(key, value):
new_settings = inject.get_injectable('settings')
new_settings[key] = value
inject.add_injectable('settings', new_settings)
tracing.config_logger()
log_settings(injectables)
t0 = tracing.print_elapsed_time()
# cleanup if not resuming
if not config.setting('resume_after', False):
cleanup_output_files()
run_list = mp_tasks.get_run_list()
if run_list['multiprocess']:
# do this after config.handle_standard_args, as command line args may override injectables
injectables = list(set(injectables) | set(['data_dir', 'configs_dir', 'output_dir']))
injectables = {k: inject.get_injectable(k) for k in injectables}
else:
injectables = None
run(run_list, injectables)
# pipeline will be close if multiprocessing
# if you want access to tables, BE SURE TO OPEN WITH '_' or all tables will be reinitialized
# pipeline.open_pipeline('_')
t0 = tracing.print_elapsed_time("everything", t0)