Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
tracing.trace_df(interaction_utilities[trace_rows],
tracing.extend_trace_label(trace_label, 'interaction_utilities'),
slicer='NONE', transpose=False)
tracing.dump_df(DUMP, interaction_utilities, trace_label, 'interaction_utilities')
# reshape utilities (one utility column and one row per row in interaction_utilities)
# to a dataframe with one row per chooser and one column per alternative
utilities = pd.DataFrame(
interaction_utilities.values.reshape(len(choosers), alternative_count),
index=choosers.index)
chunk.log_df(trace_label, 'utilities', utilities)
del interaction_utilities
chunk.log_df(trace_label, 'interaction_utilities', None)
if have_trace_targets:
tracing.trace_df(utilities, tracing.extend_trace_label(trace_label, 'utilities'),
column_labels=['alternative', 'utility'])
tracing.dump_df(DUMP, utilities, trace_label, 'utilities')
# convert to probabilities (utilities exponentiated and normalized to probs)
# probs is same shape as utilities, one row per chooser and one column for alternative
probs = logit.utils_to_probs(utilities, allow_zero_probs=allow_zero_probs,
trace_label=trace_label, trace_choosers=choosers)
chunk.log_df(trace_label, 'probs', probs)
del utilities
chunk.log_df(trace_label, 'utilities', None)
Returns
-------
choices: pd.Series
time periods depart choices, one per trip (except for trips with zero probs)
"""
depart_alt_base = model_settings.get('DEPART_ALT_BASE')
probs_join_cols = ['primary_purpose', 'outbound', 'tour_hour', 'trip_num']
probs_cols = [c for c in probs_spec.columns if c not in probs_join_cols]
# left join trips to probs (there may be multiple rows per trip for multiple depart ranges)
choosers = pd.merge(trips.reset_index(), probs_spec, on=probs_join_cols,
how='left').set_index('trip_id')
chunk.log_df(trace_label, "choosers", choosers)
if trace_hh_id and tracing.has_trace_targets(trips):
tracing.trace_df(choosers, '%s.choosers' % trace_label)
# choosers should now match trips row for row
assert choosers.index.is_unique
assert len(choosers.index) == len(trips.index)
# zero out probs outside earliest-latest window
chooser_probs = clip_probs(trips, choosers[probs_cols], model_settings)
chunk.log_df(trace_label, "chooser_probs", chooser_probs)
if first_trip_in_leg:
# probs should sum to 1 unless all zero
chooser_probs = chooser_probs.div(chooser_probs.sum(axis=1), axis=0).fillna(0)
Index will be that of `choosers`, values will be logsum across spec column values
"""
# FIXME - untested and not currently used by any models...
trace_label = tracing.extend_trace_label(trace_label, 'eval_mnl_logsums')
have_trace_targets = trace_label and tracing.has_trace_targets(choosers)
logger.debug("running eval_mnl_logsums")
# trace choosers
if have_trace_targets:
tracing.trace_df(choosers, '%s.choosers' % trace_label)
utilities = eval_utilities(spec, choosers, locals_d, trace_label)
chunk.log_df(trace_label, "utilities", utilities)
if have_trace_targets:
tracing.trace_df(utilities, '%s.raw_utilities' % trace_label,
column_labels=['alternative', 'utility'])
# - logsums
# logsum is log of exponentiated utilities summed across columns of each chooser row
logsums = np.log(np.exp(utilities.values).sum(axis=1))
logsums = pd.Series(logsums, index=choosers.index)
chunk.log_df(trace_label, "logsums", logsums)
# trace utilities
if have_trace_targets:
tracing.trace_df(logsums, '%s.logsums' % trace_label,
column_labels=['alternative', 'logsum'])
trace_rows = trace_ids = None
if skims is not None:
set_skim_wrapper_targets(interaction_df, skims)
# evaluate expressions from the spec multiply by coefficients and sum
# spec is df with one row per spec expression and one col with utility coefficient
# column names of choosers match spec index values
# utilities has utility value for element in the cross product of choosers and alternatives
# interaction_utilities is a df with one utility column and one row per row in alternative
interaction_utilities, trace_eval_results \
= eval_interaction_utilities(spec, interaction_df, locals_d, trace_label, trace_rows)
chunk.log_df(trace_label, 'interaction_utilities', interaction_utilities)
del interaction_df
chunk.log_df(trace_label, 'interaction_df', None)
if have_trace_targets:
tracing.trace_interaction_eval_results(trace_eval_results, trace_ids,
tracing.extend_trace_label(trace_label, 'eval'))
tracing.trace_df(interaction_utilities,
tracing.extend_trace_label(trace_label, 'interaction_utilities'),
transpose=False)
# reshape utilities (one utility column and one row per row in model_design)
# to a dataframe with one row per chooser and one column per alternative
# interaction_utilities is sparse because duplicate sampled alternatives were dropped
# so we need to pad with dummy utilities so low that they are never chosen
# number of samples per chooser
sample_counts = interaction_utilities.groupby(interaction_utilities.index).size().values
if have_trace_targets:
tracing.trace_df(choosers, '%s.choosers' % trace_label)
raw_utilities = eval_utilities(spec, choosers, locals_d, trace_label, have_trace_targets)
chunk.log_df(trace_label, "raw_utilities", raw_utilities)
if have_trace_targets:
tracing.trace_df(raw_utilities, '%s.raw_utilities' % trace_label,
column_labels=['alternative', 'utility'])
# exponentiated utilities of leaves and nests
nested_exp_utilities = compute_nested_exp_utilities(raw_utilities, nest_spec)
chunk.log_df(trace_label, "nested_exp_utilities", nested_exp_utilities)
del raw_utilities
chunk.log_df(trace_label, 'raw_utilities', None)
if have_trace_targets:
tracing.trace_df(nested_exp_utilities, '%s.nested_exp_utilities' % trace_label,
column_labels=['alternative', 'utility'])
# probabilities of alternatives relative to siblings sharing the same nest
nested_probabilities = \
compute_nested_probabilities(nested_exp_utilities, nest_spec, trace_label=trace_label)
chunk.log_df(trace_label, "nested_probabilities", nested_probabilities)
del nested_exp_utilities
chunk.log_df(trace_label, 'nested_exp_utilities', None)
if have_trace_targets:
tracing.trace_df(nested_probabilities, '%s.nested_probabilities' % trace_label,
column_labels=['alternative', 'probability'])
Returns
-------
choices : pandas.Series
Index will be that of `choosers`, values will match the columns
of `spec`.
"""
trace_label = tracing.extend_trace_label(trace_label, 'eval_mnl')
have_trace_targets = trace_label and tracing.has_trace_targets(choosers)
if have_trace_targets:
tracing.trace_df(choosers, '%s.choosers' % trace_label)
utilities = eval_utilities(spec, choosers, locals_d, trace_label)
chunk.log_df(trace_label, "utilities", utilities)
if have_trace_targets:
tracing.trace_df(utilities, '%s.utilities' % trace_label,
column_labels=['alternative', 'utility'])
probs = logit.utils_to_probs(utilities, trace_label=trace_label, trace_choosers=choosers)
chunk.log_df(trace_label, "probs", probs)
del utilities
chunk.log_df(trace_label, 'utilities', None)
if have_trace_targets:
# report these now in case make_choices throws error on bad_choices
tracing.trace_df(probs, '%s.probs' % trace_label,
column_labels=['alternative', 'probability'])
# if no timetable window_id_col specified, then add index as an explicit column
# (this is not strictly necessary but its presence makes code simpler in several places)
if window_id_col is None:
window_id_col = tours.index.name
tours[window_id_col] = tours.index
# timetable can't handle multiple tours per window_id
assert not tours[window_id_col].duplicated().any()
# - build interaction dataset filtered to include only available tdd alts
# dataframe columns start, end , duration, person_id, tdd
# indexed (not unique) on tour_id
choice_column = 'tdd'
alt_tdd = tdd_interaction_dataset(tours, alts, timetable, choice_column, window_id_col,
tour_trace_label)
chunk.log_df(tour_trace_label, "alt_tdd", alt_tdd)
# - add logsums
if logsum_tour_purpose:
logsums = \
compute_logsums(alt_tdd, tours, logsum_tour_purpose, model_settings, tour_trace_label)
else:
logsums = 0
alt_tdd['mode_choice_logsum'] = logsums
# - merge in previous tour columns
# adds start_previous and end_previous, joins on index
tours = \
tours.join(get_previous_tour_by_tourid(tours[tour_owner_id_col], previous_tour, alts))
chunk.log_df(tour_trace_label, "tours", tours)
# - make choices
tour_trace_label)
chunk.log_df(tour_trace_label, "alt_tdd", alt_tdd)
# - add logsums
if logsum_tour_purpose:
logsums = \
compute_logsums(alt_tdd, tours, logsum_tour_purpose, model_settings, tour_trace_label)
else:
logsums = 0
alt_tdd['mode_choice_logsum'] = logsums
# - merge in previous tour columns
# adds start_previous and end_previous, joins on index
tours = \
tours.join(get_previous_tour_by_tourid(tours[tour_owner_id_col], previous_tour, alts))
chunk.log_df(tour_trace_label, "tours", tours)
# - make choices
locals_d = {
'tt': timetable
}
constants = config.get_model_constants(model_settings)
if constants is not None:
locals_d.update(constants)
choices = interaction_sample_simulate(
tours,
alt_tdd,
spec,
choice_column=choice_column,
locals_d=locals_d,
chunk_size=0,
chunk.log_df(trace_label, 'choices_df', choices_df)
# set index after groupby so we can trace on it
choices_df.set_index(choosers.index.name, inplace=True)
tracing.dump_df(DUMP, choices_df, trace_label, 'choices_df')
if have_trace_targets:
tracing.trace_df(choices_df,
tracing.extend_trace_label(trace_label, 'sampled_alternatives'),
transpose=False,
column_labels=['sample_alt', 'alternative'])
# don't need this after tracing
del choices_df['rand']
chunk.log_df(trace_label, 'choices_df', choices_df)
# - NARROW
choices_df['prob'] = choices_df['prob'].astype(np.float32)
assert (choices_df['pick_count'].max() < 4294967295) or (choices_df.empty)
choices_df['pick_count'] = choices_df['pick_count'].astype(np.uint32)
return choices_df