Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# uncomment the line below to set random seed so that run results are reproducible
set_random_seed()
inject.add_injectable("set_random_seed", set_random_seed)
tracing.config_logger()
t0 = print_elapsed_time()
taz_skim_stack = inject.get_injectable('taz_skim_dict')
t0 = print_elapsed_time("load taz_skim_dict", t0)
tap_skim_stack = inject.get_injectable('tap_skim_dict')
t0 = print_elapsed_time("load tap_skim_dict", t0)
network_los = inject.get_injectable('network_los')
t0 = print_elapsed_time("load network_los", t0)
# test sizes for all implemented methods
VECTOR_TEST_SIZEs = (10000, 100000, 1000000, 5000000, 10000000, 20000000)
# VECTOR_TEST_SIZEs = [20000000, 40000000]
for size in VECTOR_TEST_SIZEs:
logger.info("VECTOR_TEST_SIZE %s" % size)
get_taz(size)
t0 = print_elapsed_time_per_unit("get_taz", t0, size)
get_tap(size)
t0 = print_elapsed_time_per_unit("get_tap", t0, size)
spc.set_choices(choices_df)
if locutor:
spc.write_trace_files(iteration)
if spc.use_shadow_pricing and spc.check_fit(iteration):
logging.info("%s converged after iteration %s" % (trace_label, iteration,))
break
# - shadow price table
if locutor:
if spc.use_shadow_pricing and 'SHADOW_PRICE_TABLE' in model_settings:
inject.add_table(model_settings['SHADOW_PRICE_TABLE'], spc.shadow_prices)
if 'MODELED_SIZE_TABLE' in model_settings:
inject.add_table(model_settings['MODELED_SIZE_TABLE'], spc.modeled_size)
dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME']
tracing.print_summary(dest_choice_column_name, choices, value_counts=True)
persons_df = persons.to_frame()
# We only chose school locations for the subset of persons who go to school
# so we backfill the empty choices with -1 to code as no school location
NO_DEST_TAZ = -1
persons_df[dest_choice_column_name] = \
choices.reindex(persons_df.index).fillna(NO_DEST_TAZ).astype(int)
# - annotate persons table
if 'annotate_persons' in model_settings:
expressions.assign_columns(
df=persons_df,
trace_rows = trace_ids = None
# interaction_utilities is a df with one utility column and one row per interaction_df row
interaction_utilities, trace_eval_results \
= eval_interaction_utilities(spec, interaction_df, locals_d, trace_label, trace_rows)
chunk.log_df(trace_label, 'interaction_utilities', interaction_utilities)
del interaction_df
chunk.log_df(trace_label, 'interaction_df', None)
if have_trace_targets:
tracing.trace_interaction_eval_results(trace_eval_results, trace_ids,
tracing.extend_trace_label(trace_label, 'eval'))
tracing.trace_df(interaction_utilities[trace_rows],
tracing.extend_trace_label(trace_label, 'interaction_utilities'),
slicer='NONE', transpose=False)
tracing.dump_df(DUMP, interaction_utilities, trace_label, 'interaction_utilities')
# reshape utilities (one utility column and one row per row in interaction_utilities)
# to a dataframe with one row per chooser and one column per alternative
utilities = pd.DataFrame(
interaction_utilities.values.reshape(len(choosers), alternative_count),
index=choosers.index)
chunk.log_df(trace_label, 'utilities', utilities)
del interaction_utilities
chunk.log_df(trace_label, 'interaction_utilities', None)
if have_trace_targets:
tracing.trace_df(utilities, tracing.extend_trace_label(trace_label, 'utilities'),
tracing.trace_interaction_eval_results(trace_eval_results, trace_ids,
tracing.extend_trace_label(trace_label, 'eval'))
tracing.trace_df(interaction_utilities[trace_rows],
tracing.extend_trace_label(trace_label, 'interaction_utilities'),
slicer='NONE', transpose=False)
# reshape utilities (one utility column and one row per row in model_design)
# to a dataframe with one row per chooser and one column per alternative
utilities = pd.DataFrame(
interaction_utilities.values.reshape(len(choosers), sample_size),
index=choosers.index)
chunk.log_df(trace_label, 'utilities', utilities)
if have_trace_targets:
tracing.trace_df(utilities, tracing.extend_trace_label(trace_label, 'utilities'),
column_labels=['alternative', 'utility'])
tracing.dump_df(DUMP, utilities, trace_label, 'utilities')
# convert to probabilities (utilities exponentiated and normalized to probs)
# probs is same shape as utilities, one row per chooser and one column for alternative
probs = logit.utils_to_probs(utilities, trace_label=trace_label, trace_choosers=choosers)
chunk.log_df(trace_label, 'probs', probs)
if have_trace_targets:
tracing.trace_df(probs, tracing.extend_trace_label(trace_label, 'probs'),
column_labels=['alternative', 'probability'])
# make choices
# positions is series with the chosen alternative represented as a column index in probs
# which is an integer between zero and num alternatives in the alternative sample
We expect, at least, columns [_hh_id_, _ptype_]
cdap_fixed_relative_proportions
spec to compute/specify the relative proportions of each activity (M, N, H)
that should be used to choose activities for additional household members
not handled by CDAP.
locals_d : Dict
dictionary of local variables that eval_variables adds to the environment
for an evaluation of an expression that begins with @
Returns
-------
choices : pandas.Series
list of alternatives chosen for all extra members, indexed by _persons_index_
"""
trace_label = tracing.extend_trace_label(trace_label, 'extra_hh_member_choices')
# extra household members have cdap_ran > MAX_HHSIZE
choosers = persons[persons['cdap_rank'] > MAX_HHSIZE]
if len(choosers.index) == 0:
return pd.Series()
# eval the expression file
values = simulate.eval_variables(cdap_fixed_relative_proportions.index, choosers, locals_d)
# cdap_fixed_relative_proportions computes relative proportions by ptype, not utilities
proportions = values.dot(cdap_fixed_relative_proportions)
# convert relative proportions to probability
probs = proportions.div(proportions.sum(axis=1), axis=0)
trace_label=tracing.extend_trace_label(trace_label, 'od'))
# - dp_logsums
dp_skims = {
'ORIGIN': model_settings['ALT_DEST'],
'DESTINATION': model_settings['PRIMARY_DEST'],
"odt_skims": skims['dpt_skims'],
"od_skims": skims['dp_skims'],
}
destination_sample['dp_logsum'] = compute_ood_logsums(
choosers,
logsum_settings,
dp_skims,
locals_dict,
chunk_size,
trace_label=tracing.extend_trace_label(trace_label, 'dp'))
tours = tours_merged.to_frame()
tours = tours[tours.tour_category != 'subtour']
nest_spec = config.get_logit_model_settings(tour_mode_choice_settings)
constants = config.get_model_constants(tour_mode_choice_settings)
logger.info("Running tour_mode_choice_simulate with %d tours" % len(tours.index))
tracing.print_summary('tour_mode_choice_simulate tour_type',
tours.tour_type, value_counts=True)
if trace_hh_id:
tracing.trace_df(tour_mode_choice_spec,
tracing.extend_trace_label(trace_label, 'spec'),
slicer='NONE', transpose=False)
# setup skim keys
odt_skim_stack_wrapper = skim_stack.wrap(left_key='TAZ', right_key='destination',
skim_key="out_period")
dot_skim_stack_wrapper = skim_stack.wrap(left_key='destination', right_key='TAZ',
skim_key="in_period")
od_skims = skim_dict.wrap('TAZ', 'destination')
choices_list = []
for tour_type, segment in tours.groupby('tour_type'):
# if tour_type != 'work':
# continue
dest_size_terms,
model_settings,
chunk_size,
trace_hh_id,
tracing.extend_trace_label(trace_label, 'sample'))
# - school_location_logsums
location_sample_df = \
run_school_location_logsums(
persons_merged_df,
skim_dict, skim_stack,
location_sample_df,
model_settings,
chunk_size,
trace_hh_id,
tracing.extend_trace_label(trace_label, 'logsums'))
# - school_location_simulate
choices = \
run_school_location_simulate(
persons_merged_df,
location_sample_df,
skim_dict,
dest_size_terms,
model_settings,
chunk_size,
trace_hh_id,
tracing.extend_trace_label(trace_label, 'simulate'))
return choices
(tour_type, len(choosers_segment), len(alternatives_segment)))
if len(choosers_segment.index) > 0:
# want named index so tracing knows how to slice
assert choosers_segment.index.name == 'tour_id'
choices = interaction_sample(
choosers_segment,
alternatives_segment,
sample_size=sample_size,
alt_col_name=alt_dest_col_name,
spec=model_spec[[tour_type]],
skims=skims,
locals_d=locals_d,
chunk_size=chunk_size,
trace_label=tracing.extend_trace_label(trace_label, tour_type))
choices['tour_type_id'] = tour_type_id
choices_list.append(choices)
choices = pd.concat(choices_list)
# - NARROW
choices['tour_type_id'] = choices['tour_type_id'].astype(np.uint8)
if trace_hh_id:
tracing.trace_df(choices,
label="joint_tour_destination_sample",
transpose=True)
return choices
custom_chooser : function(probs, choosers, spec, trace_label) returns choices, rands
custom alternative to logit.make_choices
trace_label: str
This is the label to be used for trace log file entries and dump file names
when household tracing enabled. No tracing occurs if label is empty or None.
trace_choice_name: str
This is the column label to be used in trace file csv dump of choices
Returns
-------
choices : pandas.Series
Index will be that of `choosers`, values will match the columns
of `spec`.
"""
trace_label = tracing.extend_trace_label(trace_label, 'eval_nl')
assert trace_label
have_trace_targets = tracing.has_trace_targets(choosers)
if have_trace_targets:
tracing.trace_df(choosers, '%s.choosers' % trace_label)
raw_utilities = eval_utilities(spec, choosers, locals_d, trace_label, have_trace_targets)
chunk.log_df(trace_label, "raw_utilities", raw_utilities)
if have_trace_targets:
tracing.trace_df(raw_utilities, '%s.raw_utilities' % trace_label,
column_labels=['alternative', 'utility'])
# exponentiated utilities of leaves and nests
nested_exp_utilities = compute_nested_exp_utilities(raw_utilities, nest_spec)
chunk.log_df(trace_label, "nested_exp_utilities", nested_exp_utilities)