Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# calculate single person utilities
individual_vars = eval_variables(cdap_indiv_spec.index, persons, locals_d)
indiv_utils = individual_vars.dot(cdap_indiv_spec)
# add columns from persons to facilitate building household interactions
useful_columns = [_hh_id_, _ptype_, _cdap_rank_, _hh_size_]
indiv_utils[useful_columns] = persons[useful_columns]
if DUMP:
tracing.trace_df(indiv_utils,
'%s.DUMP.indiv_utils' % trace_label,
transpose=False,
slicer='NONE')
if trace_hh_id:
tracing.trace_df(individual_vars, '%s.individual_vars' % trace_label,
column_labels=['expression', 'person'],
warn_if_empty=False)
tracing.trace_df(indiv_utils, '%s.indiv_utils' % trace_label,
column_labels=['activity', 'person'],
warn_if_empty=False)
return indiv_utils
right_index=True
)
# resulting dataframe has columns _hh_id_,_cdap_rank_, hh_choices indexed on _persons_index_
indiv_activity["cdap_activity"] = ''
# for each cdap_rank (1..5)
for i in range(MAX_HHSIZE):
pnum_i = (indiv_activity[_cdap_rank_] == i+1)
indiv_activity.loc[pnum_i, ["cdap_activity"]] = indiv_activity[pnum_i]['hh_choices'].str[i]
cdap_indiv_activity_choices = indiv_activity['cdap_activity']
if DUMP:
tracing.trace_df(cdap_indiv_activity_choices,
'%s.DUMP.cdap_indiv_activity_choices' % trace_label,
transpose=False,
slicer='NONE')
return cdap_indiv_activity_choices
'%s.DUMP.hhsize%d_choosers' % (trace_label, hhsize),
transpose=False,
slicer='NONE')
tracing.trace_df(vars,
'%s.DUMP.hhsize%d_vars' % (trace_label, hhsize),
transpose=False,
slicer='NONE')
if trace_hh_id:
tracing.trace_df(choosers,
'%s.hhsize%d_choosers' % (trace_label, hhsize),
column_labels=['expression', 'person'],
warn_if_empty=False)
tracing.trace_df(vars,
'%s.hhsize%d_vars' % (trace_label, hhsize),
column_labels=['expression', 'person'],
warn_if_empty=False)
return utils
'%s.DUMP.extra_choices' % trace_label,
transpose=False,
slicer='NONE')
if trace_hh_id:
if USE_FIXED_PROPORTIONS:
tracing.trace_df(proportions, '%s.extra_hh_member_choices_proportions' % trace_label,
column_labels=['expression', 'person'],
warn_if_empty=False)
else:
tracing.trace_df(utils, '%s.extra_hh_member_choices_utils' % trace_label,
column_labels=['expression', 'person'],
warn_if_empty=False)
tracing.trace_df(probs, '%s.extra_hh_member_choices_probs' % trace_label,
column_labels=['expression', 'person'],
warn_if_empty=False)
tracing.trace_df(choices, '%s.extra_hh_member_choices_choices' % trace_label,
column_labels=['expression', 'person'],
warn_if_empty=False)
return choices
# convert choice expressed as index into alternative name from util column label
choices = pd.Series(utils.columns[idx_choices].values, index=utils.index)
if DUMP:
tracing.trace_df(utils,
'%s.DUMP.hhsize%d_utils' % (trace_label, hhsize),
transpose=False,
slicer='NONE')
tracing.trace_df(probs,
'%s.DUMP.hhsize%d_probs' % (trace_label, hhsize),
transpose=False,
slicer='NONE')
tracing.trace_df(choices,
'%s.DUMP.hhsize%d_activity_choices' % (trace_label, hhsize),
transpose=False,
slicer='NONE')
if trace_hh_id:
tracing.trace_df(utils, '%s.hhsize%d_utils' % (trace_label, hhsize),
column_labels=['expression', 'household'],
warn_if_empty=False)
tracing.trace_df(probs, '%s.hhsize%d_probs' % (trace_label, hhsize),
column_labels=['expression', 'household'],
warn_if_empty=False)
tracing.trace_df(choices, '%s.hhsize%d_activity_choices' % (trace_label, hhsize),
column_labels=['expression', 'household'],
warn_if_empty=False)
else:
utils = hh_utilities(
indiv_utils, cdap_interaction_coefficients, hhsize=hhsize,
trace_hh_id=trace_hh_id, trace_label=trace_label)
probs = nl.utils_to_probs(utils, trace_label=trace_label)
# select an activity pattern alternative for each household based on probability
# result is a series indexed on _hh_index_ with the (0 based) index of the column from probs
idx_choices = nl.make_choices(probs, trace_label=trace_label)
# convert choice expressed as index into alternative name from util column label
choices = pd.Series(utils.columns[idx_choices].values, index=utils.index)
if DUMP:
tracing.trace_df(utils,
'%s.DUMP.hhsize%d_utils' % (trace_label, hhsize),
transpose=False,
slicer='NONE')
tracing.trace_df(probs,
'%s.DUMP.hhsize%d_probs' % (trace_label, hhsize),
transpose=False,
slicer='NONE')
tracing.trace_df(choices,
'%s.DUMP.hhsize%d_activity_choices' % (trace_label, hhsize),
transpose=False,
slicer='NONE')
if trace_hh_id:
tracing.trace_df(utils, '%s.hhsize%d_utils' % (trace_label, hhsize),
.rank(method='first', na_option='top')\
.astype(int)
# FIXME - possible workaround if above too big/slow
# stackoverflow.com/questions/26720916/faster-way-to-rank-rows-in-subgroups-in-pandas-dataframe
# Working with a big DataFrame (13 million lines), the method rank with groupby
# maxed out my 8GB of RAM an it took a really long time. I found a workaround
# less greedy in memory , that I put here just in case:
# df.sort_values('value')
# tmp = df.groupby('group').size()
# rank = tmp.map(range)
# rank =[item for sublist in rank for item in sublist]
# df['rank'] = rank
if DUMP:
tracing.trace_df(cdap_persons,
'%s.DUMP.cdap_person_array' % trace_label,
transpose=False,
slicer='NONE')
if trace_hh_id:
tracing.trace_df(cdap_persons, '%s.cdap_rank' % trace_label,
warn_if_empty=True)
return cdap_persons[_cdap_rank_]
# concat all the resulting Series
hh_activity_choices = pd.concat(hh_choices_list)
cdap_person_choices \
= unpack_cdap_indiv_activity_choices(indiv_utils, hh_activity_choices,
trace_hh_id, trace_label)
extra_person_choices \
= extra_hh_member_choices(indiv_utils, cdap_fixed_relative_proportions, locals_d,
trace_hh_id, trace_label)
person_choices = pd.concat([cdap_person_choices, extra_person_choices])
if DUMP:
tracing.trace_df(hh_activity_choices,
'%s.DUMP.hh_activity_choices' % trace_label,
transpose=False,
slicer='NONE')
tracing.trace_df(person_choices,
'%s.DUMP.person_choices' % trace_label,
transpose=False,
slicer='NONE')
if trace_hh_id:
tracing.trace_df(person_choices, '%s.person_choices' % trace_label,
columns='choice')
return person_choices