Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
destination_sample,
skim_dict, skim_stack,
chunk_size, trace_hh_id):
"""
add logsum column to existing joint_tour_destination_sample table
logsum is calculated by computing the mode_choice model utilities for each
sampled (joint_tour, dest_taz) destination alternative in joint_tour_destination_sample,
and computing the logsum of all the utilities for each destination.
"""
trace_label = 'joint_tour_destination_logsums'
model_settings = config.read_model_settings('joint_tour_destination.yaml')
logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])
joint_tours_merged = pd.merge(joint_tours, persons_merged,
left_on='person_id', right_index=True, how='left')
# - only include columns actually used in spec
joint_tours_merged = \
logsum.filter_chooser_columns(joint_tours_merged, logsum_settings, model_settings)
logsums_list = []
for tour_type, tour_type_id in iteritems(TOUR_TYPE_ID):
choosers = destination_sample[destination_sample['tour_type_id'] == tour_type_id]
if choosers.shape[0] == 0:
logger.info("%s skipping tour_type %s: no tours", trace_label, tour_type)
continue
if using shadow pricing, we scale size_table counts to sample population
(in which case, they have to be created while single-process)
Scaling is problematic as it breaks household result replicability across sample sizes
It also changes the magnitude of the size terms so if they are used as utilities in
expression files, their importance will diminish relative to other utilities as the sample
size decreases.
Scaling makes most sense for a full sample in conjunction with shadow pricing, where
shadow prices can be adjusted iteratively to bring modelled counts into line with desired
(size table) counts.
"""
use_shadow_pricing = bool(config.setting('use_shadow_pricing'))
shadow_settings = config.read_model_settings('shadow_pricing.yaml')
shadow_pricing_models = shadow_settings['shadow_pricing_models']
# probably ought not scale if not shadow_pricing (breaks partial sample replicability)
# but this allows compatability with existing CTRAMP behavior...
scale_size_table = shadow_settings.get('SCALE_SIZE_TABLE', False)
if shadow_pricing_models is None:
logger.warning('shadow_pricing_models list not found in shadow_pricing settings')
return
# shadow_pricing_models is dict of {: }
# since these are scaled to model size, they have to be created while single-process
for model_selector, model_name in iteritems(shadow_pricing_models):
model_settings = config.read_model_settings(model_name)
def atwork_subtour_scheduling(
tours,
persons_merged,
tdd_alts,
skim_dict,
chunk_size,
trace_hh_id):
"""
This model predicts the departure time and duration of each activity for at work subtours tours
"""
trace_label = 'atwork_subtour_scheduling'
model_settings = config.read_model_settings('tour_scheduling_atwork.yaml')
model_spec = simulate.read_model_spec(file_name='tour_scheduling_atwork.csv')
persons_merged = persons_merged.to_frame()
tours = tours.to_frame()
subtours = tours[tours.tour_category == 'atwork']
# - if no atwork subtours
if subtours.shape[0] == 0:
tracing.no_results(trace_label)
return
logger.info("Running %s with %d tours", trace_label, len(subtours))
# preprocessor
constants = config.get_model_constants(model_settings)
skim_dict
joint_tour_destination_sample_spec
size_term_calculator
chunk_size
trace_hh_id
Returns
-------
choices : pandas.DataFrame
destination_sample df
"""
trace_label = 'joint_tour_destination_sample'
model_settings = config.read_model_settings('joint_tour_destination.yaml')
model_spec = simulate.read_model_spec(file_name='non_mandatory_tour_destination_sample.csv')
# choosers are tours - in a sense tours are choosing their destination
choosers = pd.merge(joint_tours, households_merged,
left_on='household_id', right_index=True, how='left')
# FIXME - MEMORY HACK - only include columns actually used in spec
chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
choosers = choosers[chooser_columns]
sample_size = model_settings["SAMPLE_SIZE"]
# specify name interaction_sample should give the alternative column (logsums needs to know it)
alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
# create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
# and a TAZ in the alternatives which get merged during interaction
+-----------+--------------+----------------+------------+----------------+
+ 23750 | 16 | 0.711135838871 | 6 | 1.92315598631 |
+-----------+--------------+----------------+------------+----------------+
+ ... | | | | |
+-----------+--------------+----------------+------------+----------------+
| 23751 | 12 | 0.408038878552 | 1 | 2.40612135416 |
+-----------+--------------+----------------+------------+----------------+
| 23751 | 14 | 0.972732479292 | 2 | 1.44009018355 |
+-----------+--------------+----------------+------------+----------------+
"""
trace_label = 'atwork_subtour_destination_logsums'
model_settings = config.read_model_settings('atwork_subtour_destination.yaml')
logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])
# FIXME - MEMORY HACK - only include columns actually used in spec
persons_merged = logsum.filter_chooser_columns(persons_merged, logsum_settings, model_settings)
# merge persons into tours
choosers = pd.merge(destination_sample,
persons_merged,
left_on='person_id',
right_index=True,
how="left")
logger.info("Running %s with %s rows", trace_label, len(choosers))
tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged')
tracing.dump_df(DUMP, choosers, trace_label, 'choosers')
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, trace_label):
"""
compute logsums for tours using skims for alt_tdd out_period and in_period
"""
trace_label = tracing.extend_trace_label(trace_label, 'logsums')
logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])
choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser')
logger.info("%s compute_logsums for %d choosers%s alts" %
(trace_label, choosers.shape[0], alt_tdd.shape[0]))
# - setup skims
skim_dict = inject.get_injectable('skim_dict')
skim_stack = inject.get_injectable('skim_stack')
orig_col_name = 'TAZ'
dest_col_name = model_settings.get('DESTINATION_FOR_TOUR_PURPOSE').get(tour_purpose)
odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
skim_key='out_period')
dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
def annotate_table():
# model_settings name should have been provided as a step argument
model_name = inject.get_step_arg('model_name')
trace_label = 'annotate_table.%s' % model_name
model_settings = config.read_model_settings('%s.yaml' % model_name)
df_name = model_settings['DF']
df = inject.get_table(df_name).to_frame()
results = expressions.compute_columns(
df,
model_settings=model_settings,
trace_label=trace_label)
assign_in_place(df, results)
pipeline.replace_table(df_name, df)
def workplace_location_settings(configs_dir):
return config.read_model_settings(configs_dir, 'workplace_location.yaml')
str:
name of yaml file in configs_dir to load dict from
locals_dict : dict
dict of locals (e.g. utility functions) to add to the execution environment
trace_label
Returns
-------
results: pandas.DataFrame
one column for each expression (except temps with ALL_CAP target names)
same index as df
"""
if isinstance(model_settings, str):
model_settings_name = model_settings
model_settings = config.read_model_settings('%s.yaml' % model_settings)
assert model_settings, "Found no model settings for %s" % model_settings_name
else:
model_settings_name = 'dict'
assert isinstance(model_settings, dict)
assert 'DF' in model_settings, \
"Expected to find 'DF' in %s" % model_settings_name
df_name = model_settings.get('DF')
helper_table_names = model_settings.get('TABLES', [])
expressions_spec_name = model_settings.get('SPEC', model_settings_name)
assert expressions_spec_name is not None, \
"Expected to find 'SPEC' in %s" % model_settings_name
trace_label = tracing.extend_trace_label(trace_label or '', expressions_spec_name)
def mandatory_tour_frequency(persons_merged,
chunk_size,
trace_hh_id):
"""
This model predicts the frequency of making mandatory trips (see the
alternatives above) - these trips include work and school in some combination.
"""
trace_label = 'mandatory_tour_frequency'
model_settings = config.read_model_settings('mandatory_tour_frequency.yaml')
model_spec = simulate.read_model_spec(file_name='mandatory_tour_frequency.csv')
alternatives = simulate.read_model_alts(
config.config_file_path('mandatory_tour_frequency_alternatives.csv'), set_index='alt')
choosers = persons_merged.to_frame()
# filter based on results of CDAP
choosers = choosers[choosers.cdap_activity == 'M']
logger.info("Running mandatory_tour_frequency with %d persons", len(choosers))
# - if no mandatory tours
if choosers.shape[0] == 0:
add_null_results(trace_label, model_settings)
return
# - preprocessor
preprocessor_settings = model_settings.get('preprocessor', None)