Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
spc.set_choices(choices_df)
if locutor:
spc.write_trace_files(iteration)
if spc.use_shadow_pricing and spc.check_fit(iteration):
logging.info("%s converged after iteration %s" % (trace_label, iteration,))
break
# - shadow price table
if locutor:
if spc.use_shadow_pricing and 'SHADOW_PRICE_TABLE' in model_settings:
inject.add_table(model_settings['SHADOW_PRICE_TABLE'], spc.shadow_prices)
if 'MODELED_SIZE_TABLE' in model_settings:
inject.add_table(model_settings['MODELED_SIZE_TABLE'], spc.modeled_size)
dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME']
tracing.print_summary(dest_choice_column_name, choices, value_counts=True)
persons_df = persons.to_frame()
# We only chose school locations for the subset of persons who go to school
# so we backfill the empty choices with -1 to code as no school location
NO_DEST_TAZ = -1
persons_df[dest_choice_column_name] = \
choices.reindex(persons_df.index).fillna(NO_DEST_TAZ).astype(int)
# - annotate persons table
if 'annotate_persons' in model_settings:
expressions.assign_columns(
df=persons_df,
def person_windows(persons, tdd_alts):
df = tt.create_timetable_windows(persons, tdd_alts)
inject.add_table('person_windows', df)
return df
def persons(households, trace_hh_id):
df = read_raw_persons(households)
logger.info("loaded persons %s" % (df.shape,))
df.index.name = 'person_id'
# replace table function with dataframe
inject.add_table('persons', df)
pipeline.get_rn_generator().add_channel(df, 'persons')
if trace_hh_id:
tracing.register_traceable_table('persons', df)
tracing.trace_df(df, "raw.persons", warn_if_empty=True)
return df
synthetic_households = merge_seed_data(
expanded_household_ids,
households,
options=synthetic_tables_settings.get('households'),
trace_label='households')
inject.add_table('synthetic_households', synthetic_households)
synthetic_persons = merge_seed_data(
expanded_household_ids,
persons,
options=synthetic_tables_settings.get('persons'),
trace_label='persons')
inject.add_table('synthetic_persons', synthetic_persons)
# build canonical weights table
seed_weights_df = incidence_df[[seed_geography]].copy()
seed_weights_df['preliminary_balanced_weight'] = weights
seed_weights_df['sample_weight'] = sample_weights
# copy household_id_col index to named column
seed_weights_df[setting('household_id_col')] = seed_weights_df.index
# this is just a convenience if there are no meta controls
if inject.get_step_arg('final', default=False):
seed_weights_df['balanced_weight'] = seed_weights_df['preliminary_balanced_weight']
repop = inject.get_step_arg('repop', default=False)
inject.add_table(weight_table_name(seed_geography), seed_weights_df, replace=repop)
incidence_table
household_groups (if GROUP_BY_INCIDENCE_SIGNATURE setting is enabled)
modifies tables:
households
persons
"""
seed_geography = setting('seed_geography')
households_df = households.to_frame()
persons_df = persons.to_frame()
crosswalk_df = build_crosswalk_table()
inject.add_table('crosswalk', crosswalk_df)
control_spec = read_control_spec(setting('control_file_name', 'controls.csv'), configs_dir)
inject.add_table('control_spec', control_spec)
geographies = settings['geographies']
for g in geographies:
controls = build_control_table(g, control_spec, crosswalk_df)
inject.add_table(control_table_name(g), controls)
households_df, persons_df = filter_households(households_df, persons_df, crosswalk_df)
pipeline.replace_table('households', households_df)
pipeline.replace_table('persons', persons_df)
incidence_table = \
build_incidence_table(control_spec, households_df, persons_df, crosswalk_df)
def land_use():
df = read_input_table("land_use/taz_data")
logger.info("loaded land_use %s" % (df.shape,))
df.index.name = 'TAZ'
# replace table function with dataframe
inject.add_table('land_use', df)
return df
else:
df = df_full
# persons table
inject.add_injectable('households_sliced', households_sliced)
logger.info("loaded households %s" % (df.shape,))
df.index.name = 'household_id'
# FIXME - pathological knowledge of name of chunk_id column used by chunked_choosers_by_chunk_id
assert 'chunk_id' not in df.columns
df['chunk_id'] = pd.Series(list(range(len(df))), df.index)
# replace table function with dataframe
inject.add_table('households', df)
pipeline.get_rn_generator().add_channel(df, 'households')
if trace_hh_id:
tracing.register_traceable_table('households', df)
tracing.trace_df(df, "raw.households", warn_if_empty=True)
return df
def out_table(table_name, df):
table_name = "summary_%s" % table_name
if AS_CSV:
file_name = "%s.csv" % table_name
output_dir = inject.get_injectable('output_dir')
file_path = os.path.join(output_dir, file_name)
logger.info("writing output file %s" % file_path)
write_index = df.index.name is not None
df.to_csv(file_path, index=write_index)
else:
logger.info("saving summary table %s" % table_name)
repop = inject.get_step_arg('repop', default=False)
inject.add_table(table_name, df, replace=repop)