Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
We expect, at least, columns [_hh_id_, _ptype_]
cdap_fixed_relative_proportions
spec to compute/specify the relative proportions of each activity (M, N, H)
that should be used to choose activities for additional household members
not handled by CDAP.
locals_d : Dict
dictionary of local variables that eval_variables adds to the environment
for an evaluation of an expression that begins with @
Returns
-------
choices : pandas.Series
list of alternatives chosen for all extra members, indexed by _persons_index_
"""
trace_label = tracing.extend_trace_label(trace_label, 'extra_hh_member_choices')
# extra household members have cdap_ran > MAX_HHSIZE
choosers = persons[persons['cdap_rank'] > MAX_HHSIZE]
if len(choosers.index) == 0:
return pd.Series()
# eval the expression file
values = simulate.eval_variables(cdap_fixed_relative_proportions.index, choosers, locals_d)
# cdap_fixed_relative_proportions computes relative proportions by ptype, not utilities
proportions = values.dot(cdap_fixed_relative_proportions)
# convert relative proportions to probability
probs = proportions.div(proportions.sum(axis=1), axis=0)
import pandas as pd
import numpy as np
import os
import time
import extensions
# you will want to configure this with the locations of the canonical datasets
DATA_REPO = "C:/projects/sandag-asim/toRSG/output/"
DATA_REPO = "E:/activitysim/project/output/"
DATA_REPO = "/Users/jeff.doyle/work/activitysim-data/sandag_zone/output/"
COMPARE_RESULTS = False
tracing.config_logger()
logger = logging.getLogger('activitysim')
@inject.injectable(override=True)
def output_dir():
if not os.path.exists('output'):
os.makedirs('output') # make directory if needed
return 'output'
@inject.injectable(override=True)
def data_dir():
return os.path.join(DATA_REPO)
@inject.injectable(override=True)
if not trace_od_rows.any():
logger.warning("trace_od not found origin = %s, dest = %s" % (trace_orig, trace_dest))
else:
# add OD columns to trace results
df = pd.concat([od_df[trace_od_rows], trace_results], axis=1)
# dump the trace results table (with _temp variables) to aid debugging
tracing.trace_df(df,
label='accessibility',
index_label='skim_offset',
slicer='NONE',
warn_if_empty=True)
if trace_assigned_locals:
tracing.write_csv(trace_assigned_locals, file_name="accessibility_locals")
def eval_utilities(spec, choosers, locals_d=None, trace_label=None, have_trace_targets=False):
# fixme - restore tracing and _check_for_variability
t0 = tracing.print_elapsed_time()
# if False: #fixme SLOWER
# expression_values = eval_variables(spec.index, choosers, locals_d)
# # chunk.log_df(trace_label, 'expression_values', expression_values)
# # if trace_label and tracing.has_trace_targets(choosers):
# # tracing.trace_df(expression_values, '%s.expression_values' % trace_label,
# # column_labels=['expression', None])
# # if config.setting('check_for_variability'):
# # _check_for_variability(expression_values, trace_label)
# utilities = compute_utilities(expression_values, spec)
#
# # chunk.log_df(trace_label, 'expression_values', None)
# t0 = tracing.print_elapsed_time(" eval_utilities SLOWER", t0)
#
# return utilities
# i.e. convert cdap_rank from category to index in order of category rank within household
# groupby rank() is slow, so we compute rank artisanally
# save time by sorting only the columns we need (persons is big, and sort moves data)
p = persons[[_hh_id_, 'cdap_rank', _age_]]\
.sort_values(by=[_hh_id_, 'cdap_rank', _age_], ascending=[True, True, True])
rank = p.groupby(_hh_id_).size().map(range)
rank = [item+1 for sublist in rank for item in sublist]
p['cdap_rank'] = rank
persons['cdap_rank'] = p['cdap_rank'] # assignment aligns on index values
# if DUMP:
# tracing.trace_df(persons, '%s.DUMP.cdap_person_array' % trace_label,
# transpose=False, slicer='NONE')
if trace_hh_id:
tracing.trace_df(persons, '%s.cdap_rank' % trace_label)
return persons['cdap_rank']
----------
bad_row_map
df : pandas.DataFrame
utils or probs dataframe
trace_choosers : pandas.dataframe
the choosers df (for interaction_simulate) to facilitate the reporting of hh_id
because we can't deduce hh_id from the interaction_dataset which is indexed on index
values from alternatives df
"""
df = df[bad_row_map]
if trace_choosers is None:
hh_ids = tracing.hh_id_for_chooser(df.index, df)
else:
hh_ids = tracing.hh_id_for_chooser(df.index, trace_choosers)
df['household_id'] = hh_ids
filename = "%s.%s" % (trace_label, filename)
logger.info("dumping %s" % filename)
tracing.write_csv(df, file_name=filename, transpose=False)
# log the indexes of the first MAX_PRINT offending rows
MAX_PRINT = 0
for idx in df.index[:MAX_PRINT].values:
row_msg = "%s : failed %s = %s (hh_id = %s)" % \
(trace_label, df.index.name, idx, df.household_id.loc[idx])
logger.warning(row_msg)
assign_in_place(tours, tdd_choices)
pipeline.replace_table("tours", tours)
if trace_hh_id:
tracing.trace_df(tours[tours.tour_category == 'atwork'],
label="atwork_subtour_scheduling",
slicer='person_id',
index_label='tour_id',
columns=None)
if DUMP:
subtours = tours[tours.tour_category == 'atwork']
parent_tours = tours[tours.index.isin(subtours.parent_tour_id)]
tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours')
tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours')
parent_tours['parent_tour_id'] = parent_tours.index
subtours = pd.concat([parent_tours, subtours])
tracing.dump_df(DUMP,
tt.tour_map(parent_tours, subtours, tdd_alts,
persons_id_col='parent_tour_id'),
trace_label, 'tour_map')
# add stop_frequency choices to tours table
assign_in_place(tours, choices.to_frame('stop_frequency'))
if 'primary_purpose' not in tours.columns:
assign_in_place(tours, tours_merged[['primary_purpose']])
pipeline.replace_table("tours", tours)
# create trips table
trips = process_trips(tours, stop_frequency_alts)
trips = pipeline.extend_table("trips", trips)
tracing.register_traceable_table('trips', trips)
pipeline.get_rn_generator().add_channel(trips, 'trips')
if trace_hh_id:
tracing.trace_df(tours,
label="stop_frequency.tours",
slicer='person_id',
columns=None)
tracing.trace_df(trips,
label="stop_frequency.trips",
slicer='person_id',
columns=None)
tracing.trace_df(annotations,
label="stop_frequency.annotations",
columns=None)
tracing.trace_df(tours_merged,
label="stop_frequency.tours_merged",
slicer='person_id',
Returns
-------
destination_sample: pandas.dataframe
choices_df from interaction_sample with (up to) sample_size alts for each chooser row
index (non unique) is trip_id from trips (duplicated for each alt)
and columns dest_taz, prob, and pick_count
dest_taz: int
alt identifier (dest_taz) from alternatives[]
prob: float
the probability of the chosen alternative
pick_count : int
number of duplicate picks for chooser, alt
"""
trace_label = tracing.extend_trace_label(trace_label, 'trip_destination_sample')
spec = get_spec_for_purpose(model_settings, 'DESTINATION_SAMPLE_SPEC', primary_purpose)
sample_size = model_settings["SAMPLE_SIZE"]
alt_dest_col_name = model_settings["ALT_DEST"]
logger.info("Running %s with %d trips", trace_label, trips.shape[0])
locals_dict = config.get_model_constants(model_settings).copy()
locals_dict.update({
'size_terms': size_term_matrix
})
locals_dict.update(skims)
destination_sample = interaction_sample(
choosers=trips,
def run_trip_purpose_and_destination(
trips_df,
tours_merged_df,
chunk_size,
trace_hh_id,
trace_label):
assert not trips_df.empty
choices = run_trip_purpose(
trips_df,
chunk_size=chunk_size,
trace_hh_id=trace_hh_id,
trace_label=tracing.extend_trace_label(trace_label, 'purpose')
)
trips_df['purpose'] = choices
trips_df = run_trip_destination(
trips_df,
tours_merged_df,
chunk_size, trace_hh_id,
trace_label=tracing.extend_trace_label(trace_label, 'destination'))
return trips_df