Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
locals_dict=locals_d,
trace_label=trace_label)
tdd_choices, timetable = vectorize_joint_tour_scheduling(
joint_tours, joint_tour_participants,
persons_merged,
tdd_alts,
spec=model_spec,
constants=locals_d,
chunk_size=chunk_size,
trace_label=trace_label)
timetable.replace_table()
assign_in_place(tours, tdd_choices)
pipeline.replace_table("tours", tours)
# updated df for tracing
joint_tours = tours[tours.tour_category == 'joint']
if trace_hh_id:
tracing.trace_df(joint_tours,
label="joint_tour_scheduling",
slicer='household_id')
def run(run_list, injectables=None):
if run_list['multiprocess']:
logger.info("run multiprocess simulation")
mp_tasks.run_multiprocess(run_list, injectables)
else:
logger.info("run single process simulation")
pipeline.run(models=run_list['models'], resume_after=run_list['resume_after'])
pipeline.close_pipeline()
chunk.log_write_hwm()
def replace_table(self):
# it appears that writing to numpy array person_windows writes through to person_windows_df
# so no need to refresh pandas dataframe
pipeline.replace_table(self.person_windows_table_name, self.person_windows_df)
# parent_tours table with columns ['tour_id', 'tdd'] index = tour_id
parent_tour_ids = subtours.parent_tour_id.astype(int).unique()
parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids)
parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True)
tdd_choices = vectorize_subtour_scheduling(
parent_tours,
subtours,
persons_merged,
tdd_alts, model_spec,
constants=constants,
chunk_size=chunk_size,
trace_label=trace_label)
assign_in_place(tours, tdd_choices)
pipeline.replace_table("tours", tours)
if trace_hh_id:
tracing.trace_df(tours[tours.tour_category == 'atwork'],
label="atwork_subtour_scheduling",
slicer='person_id',
index_label='tour_id',
columns=None)
if DUMP:
subtours = tours[tours.tour_category == 'atwork']
parent_tours = tours[tours.index.isin(subtours.parent_tour_id)]
tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours')
tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours')
parent_tours['parent_tour_id'] = parent_tours.index
# slice a table with a recognized slicer_column
source_df = sliced_tables[rule['source']]
sliced_tables[table_name] = df[df[rule['column']].isin(source_df.index)]
elif rule['slice_by'] is None:
# don't slice mirrored tables
sliced_tables[table_name] = df
else:
raise RuntimeError("Unrecognized slice rule '%s' for table %s" %
(rule['slice_by'], table_name))
# - write table to pipeline
hdf5_key = pipeline.pipeline_table_key(table_name, checkpoint_name)
pipeline_store[hdf5_key] = sliced_tables[table_name]
logger.debug("writing checkpoints (%s) to %s in %s",
checkpoints_df.shape, pipeline.CHECKPOINT_TABLE_NAME, pipeline_path)
pipeline_store[pipeline.CHECKPOINT_TABLE_NAME] = checkpoints_df
pipeline.replace_table("persons", persons_df)
if trace_hh_id:
tracing.trace_df(persons_df,
label=trace_label,
warn_if_empty=True)
# - annotate households table
if 'annotate_households' in model_settings:
households_df = households.to_frame()
expressions.assign_columns(
df=households_df,
model_settings=model_settings.get('annotate_households'),
trace_label=tracing.extend_trace_label(trace_label, 'annotate_households'))
pipeline.replace_table("households", households_df)
if trace_hh_id:
tracing.trace_df(households_df,
label=trace_label,
warn_if_empty=True)
return persons_df
def initialize_landuse():
trace_label = 'initialize_landuse'
model_settings = config.read_model_settings('initialize_landuse.yaml', mandatory=True)
annotate_tables(model_settings, trace_label)
# create accessibility
land_use = pipeline.get_table('land_use')
accessibility_df = pd.DataFrame(index=land_use.index)
# - write table to pipeline
pipeline.replace_table("accessibility", accessibility_df)
# persons table
inject.add_injectable('households_sliced', households_sliced)
logger.info("loaded households %s" % (df.shape,))
df.index.name = 'household_id'
# FIXME - pathological knowledge of name of chunk_id column used by chunked_choosers_by_chunk_id
assert 'chunk_id' not in df.columns
df['chunk_id'] = pd.Series(list(range(len(df))), df.index)
# replace table function with dataframe
inject.add_table('households', df)
pipeline.get_rn_generator().add_channel(df, 'households')
if trace_hh_id:
tracing.register_traceable_table('households', df)
tracing.trace_df(df, "raw.households", warn_if_empty=True)
return df
def run(run_list, injectables=None):
if run_list['multiprocess']:
logger.info("run multiprocess simulation")
mp_tasks.run_multiprocess(run_list, injectables)
else:
logger.info("run single process simulation")
pipeline.run(models=run_list['models'], resume_after=run_list['resume_after'])
pipeline.close_pipeline()
chunk.log_write_hwm()
persons.loc[persons[_ptype_].isin(CHILD_PTYPES), [_hh_id_, _ptype_, _age_]]\
.sort_values(by=[_hh_id_, _ptype_], ascending=[True, True])\
.groupby(_hh_id_).head(3)
# tag the selected children
persons.loc[children.index, 'cdap_rank'] = RANK_CHILD
del children
# choose up to MAX_HHSIZE, preferring anyone already chosen
# others = \
# persons[[_hh_id_, 'cdap_rank']]\
# .sort_values(by=[_hh_id_, 'cdap_rank'], ascending=[True, True])\
# .groupby(_hh_id_).head(MAX_HHSIZE)
# choose up to MAX_HHSIZE, choosing randomly
others = persons[[_hh_id_, 'cdap_rank']].copy()
others['random_order'] = pipeline.get_rn_generator().random_for_df(persons)
others = \
others\
.sort_values(by=[_hh_id_, 'random_order'], ascending=[True, True])\
.groupby(_hh_id_).head(MAX_HHSIZE)
# tag the backfilled persons
persons.loc[others[others.cdap_rank == RANK_UNASSIGNED].index, 'cdap_rank'] \
= RANK_BACKFILL
del others
# assign person number in cdapPersonArray preference order
# i.e. convert cdap_rank from category to index in order of category rank within household
# groupby rank() is slow, so we compute rank artisanally
# save time by sorting only the columns we need (persons is big, and sort moves data)
p = persons[[_hh_id_, 'cdap_rank', _age_]]\
.sort_values(by=[_hh_id_, 'cdap_rank', _age_], ascending=[True, True, True])