Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
chunk.log_df(trace_label, 'raw_utilities', None)
if have_trace_targets:
tracing.trace_df(nested_exp_utilities, '%s.nested_exp_utilities' % trace_label,
column_labels=['alternative', 'utility'])
# probabilities of alternatives relative to siblings sharing the same nest
nested_probabilities = \
compute_nested_probabilities(nested_exp_utilities, nest_spec, trace_label=trace_label)
chunk.log_df(trace_label, "nested_probabilities", nested_probabilities)
del nested_exp_utilities
chunk.log_df(trace_label, 'nested_exp_utilities', None)
if have_trace_targets:
tracing.trace_df(nested_probabilities, '%s.nested_probabilities' % trace_label,
column_labels=['alternative', 'probability'])
# global (flattened) leaf probabilities based on relative nest coefficients (in spec order)
base_probabilities = compute_base_probabilities(nested_probabilities, nest_spec, spec)
chunk.log_df(trace_label, "base_probabilities", base_probabilities)
del nested_probabilities
chunk.log_df(trace_label, 'nested_probabilities', None)
if have_trace_targets:
tracing.trace_df(base_probabilities, '%s.base_probabilities' % trace_label,
column_labels=['alternative', 'probability'])
# note base_probabilities could all be zero since we allowed all probs for nests to be zero
# check here to print a clear message but make_choices will raise error if probs don't sum to 1
BAD_PROB_THRESHOLD = 0.001
tracing.trace_df(persons_df,
label=trace_label,
warn_if_empty=True)
# - annotate households table
if 'annotate_households' in model_settings:
households_df = households.to_frame()
expressions.assign_columns(
df=households_df,
model_settings=model_settings.get('annotate_households'),
trace_label=tracing.extend_trace_label(trace_label, 'annotate_households'))
pipeline.replace_table("households", households_df)
if trace_hh_id:
tracing.trace_df(households_df,
label=trace_label,
warn_if_empty=True)
return persons_df
choices_df['pick_dup'] = choices_df['pick_count'] > 0
# add reverse cumcount to get total pick_count (conveniently faster than groupby.count + merge)
choices_df['pick_count'] += pick_group.cumcount(ascending=False) + 1
# drop the duplicates
choices_df = choices_df[~choices_df['pick_dup']]
del choices_df['pick_dup']
chunk.log_df(trace_label, 'choices_df', choices_df)
# set index after groupby so we can trace on it
choices_df.set_index(choosers.index.name, inplace=True)
tracing.dump_df(DUMP, choices_df, trace_label, 'choices_df')
if have_trace_targets:
tracing.trace_df(choices_df,
tracing.extend_trace_label(trace_label, 'sampled_alternatives'),
transpose=False,
column_labels=['sample_alt', 'alternative'])
# don't need this after tracing
del choices_df['rand']
chunk.log_df(trace_label, 'choices_df', choices_df)
# - NARROW
choices_df['prob'] = choices_df['prob'].astype(np.float32)
assert (choices_df['pick_count'].max() < 4294967295) or (choices_df.empty)
choices_df['pick_count'] = choices_df['pick_count'].astype(np.uint32)
return choices_df
# select an activity pattern alternative for each household based on probability
# result is a series indexed on _hh_index_ with the (0 based) index of the column from probs
idx_choices, rands = logit.make_choices(probs, trace_label=trace_label)
# convert choice expressed as index into alternative name from util column label
choices = pd.Series(utils.columns[idx_choices].values, index=utils.index)
if trace_hh_id:
if hhsize > 1:
tracing.trace_df(choosers, '%s.hhsize%d_choosers' % (trace_label, hhsize),
column_labels=['expression', 'person'])
tracing.trace_df(utils, '%s.hhsize%d_utils' % (trace_label, hhsize),
column_labels=['expression', 'household'])
tracing.trace_df(probs, '%s.hhsize%d_probs' % (trace_label, hhsize),
column_labels=['expression', 'household'])
tracing.trace_df(choices, '%s.hhsize%d_activity_choices' % (trace_label, hhsize),
column_labels=['expression', 'household'])
tracing.trace_df(rands, '%s.hhsize%d_rands' % (trace_label, hhsize),
columns=[None, 'rand'])
return choices
trace_targets = tracing.trace_targets(choosers)
offsets = np.nonzero(trace_targets)[0]
# get array of expression_values
# expression_values.shape = (len(spec), len(choosers))
# data.shape = (len(spec), len(offsets))
data = expression_values[:, offsets]
# columns is chooser index as str
column_labels = choosers.index[trace_targets].astype(str)
# index is utility expressions
index = spec.index
trace_df = pd.DataFrame(data=data, columns=column_labels, index=index)
tracing.trace_df(trace_df, '%s.expression_values' % trace_label,
slicer=None, transpose=False,
column_labels=column_labels,
index_label='expression')
return utilities
# so we backfill the empty choices with -1 to code as no school location
NO_DEST_TAZ = -1
persons_df[dest_choice_column_name] = \
choices.reindex(persons_df.index).fillna(NO_DEST_TAZ).astype(int)
# - annotate persons table
if 'annotate_persons' in model_settings:
expressions.assign_columns(
df=persons_df,
model_settings=model_settings.get('annotate_persons'),
trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))
pipeline.replace_table("persons", persons_df)
if trace_hh_id:
tracing.trace_df(persons_df,
label=trace_label,
warn_if_empty=True)
# - annotate households table
if 'annotate_households' in model_settings:
households_df = households.to_frame()
expressions.assign_columns(
df=households_df,
model_settings=model_settings.get('annotate_households'),
trace_label=tracing.extend_trace_label(trace_label, 'annotate_households'))
pipeline.replace_table("households", households_df)
if trace_hh_id:
tracing.trace_df(households_df,
label=trace_label,
# probs should sum to 1 with residual probs resulting in choice of 'fail'
chooser_probs['fail'] = 1 - chooser_probs.sum(axis=1).clip(0, 1)
if trace_hh_id and tracing.has_trace_targets(trips):
tracing.trace_df(chooser_probs, '%s.chooser_probs' % trace_label)
choices, rands = logit.make_choices(
chooser_probs,
trace_label=trace_label, trace_choosers=choosers)
chunk.log_df(trace_label, "choices", choices)
chunk.log_df(trace_label, "rands", rands)
if trace_hh_id and tracing.has_trace_targets(trips):
tracing.trace_df(choices, '%s.choices' % trace_label, columns=[None, 'depart'])
tracing.trace_df(rands, '%s.rands' % trace_label, columns=[None, 'rand'])
# convert alt choice index to depart time (setting failed choices to -1)
failed = (choices == chooser_probs.columns.get_loc('fail'))
choices = (choices + depart_alt_base).where(~failed, -1)
chunk.log_df(trace_label, "failed", failed)
# report failed trips while we have the best diagnostic info
if report_failed_trips and failed.any():
report_bad_choices(
bad_row_map=failed,
df=choosers,
filename='failed_choosers',
trace_label=trace_label,
trace_choosers=None)
choices : pandas.Series
Index will be that of `choosers`, values will match the columns
of `spec`.
"""
trace_label = tracing.extend_trace_label(trace_label, 'eval_mnl')
have_trace_targets = trace_label and tracing.has_trace_targets(choosers)
if have_trace_targets:
tracing.trace_df(choosers, '%s.choosers' % trace_label)
utilities = eval_utilities(spec, choosers, locals_d, trace_label)
chunk.log_df(trace_label, "utilities", utilities)
if have_trace_targets:
tracing.trace_df(utilities, '%s.utilities' % trace_label,
column_labels=['alternative', 'utility'])
probs = logit.utils_to_probs(utilities, trace_label=trace_label, trace_choosers=choosers)
chunk.log_df(trace_label, "probs", probs)
del utilities
chunk.log_df(trace_label, 'utilities', None)
if have_trace_targets:
# report these now in case make_choices throws error on bad_choices
tracing.trace_df(probs, '%s.probs' % trace_label,
column_labels=['alternative', 'probability'])
if custom_chooser:
choices, rands = custom_chooser(probs=probs, choosers=choosers, spec=spec,
trace_label=trace_label)
trips_df = trips.to_frame()
trips_df['trip_mode'] = choices
tracing.print_summary('tour_modes',
trips_merged.tour_mode, value_counts=True)
tracing.print_summary('trip_mode_choice choices',
choices, value_counts=True)
assert not trips_df.trip_mode.isnull().any()
pipeline.replace_table("trips", trips_df)
if trace_hh_id:
tracing.trace_df(trips_df,
label=tracing.extend_trace_label(trace_label, 'trip_mode'),
slicer='trip_id',
index_label='trip_id',
warn_if_empty=True)
# merge of alternatives, choosers on index requires increasing index
assert choosers.index.is_monotonic_increasing
assert alternatives.index.is_monotonic_increasing
# assert choosers.index.equals(alternatives.index[~alternatives.index.duplicated(keep='first')])
# this is the more general check (not requiring is_monotonic_increasing)
last_repeat = alternatives.index != np.roll(alternatives.index, -1)
assert (choosers.shape[0] == 1) or choosers.index.equals(alternatives.index[last_repeat])
have_trace_targets = trace_label and tracing.has_trace_targets(choosers)
if have_trace_targets:
tracing.trace_df(choosers, tracing.extend_trace_label(trace_label, 'choosers'))
tracing.trace_df(alternatives, tracing.extend_trace_label(trace_label, 'alternatives'),
transpose=False)
if len(spec.columns) > 1:
raise RuntimeError('spec must have only one column')
# if using skims, copy index into the dataframe, so it will be
# available as the "destination" for the skims dereference below
if skims is not None:
alternatives[alternatives.index.name] = alternatives.index
# in vanilla interaction_simulate interaction_df is cross join of choosers and alternatives
# interaction_df = logit.interaction_dataset(choosers, alternatives, sample_size)
# here, alternatives is sparsely repeated once for each (non-dup) sample
# we expect alternatives to have same index of choosers (but with duplicate index values)
# so we just need to left join alternatives with choosers
assert alternatives.index.name == choosers.index.name