Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
exploring_start = False
this_first_visit_mc = True
num_samples = 30
this_softmax = True
this_epsilon = 0.05
this_epsilon_half_life = 30
this_learning_rate = 0.1
this_learning_rate_decay = 1e6
this_lambd = 0.8
this_num_episodes = 3000
this_batch_size = 10
this_max_steps = 1000
this_tdl_fa_offline = True
state_ffs = FuncApproxBase.get_identity_feature_funcs(ic.lead_time + 1)
sa_ffs = [(lambda x, f=f: f(x[0])) for f in state_ffs] + [lambda x: x[1]]
this_fa_spec = FuncApproxSpec(
state_feature_funcs=state_ffs,
sa_feature_funcs=sa_ffs,
dnn_spec=DNNSpec(
neurons=[2, 4],
hidden_activation=DNNSpec.relu,
hidden_activation_deriv=DNNSpec.relu_deriv,
output_activation=DNNSpec.identity,
output_activation_deriv=DNNSpec.identity_deriv
)
)
raa = RunAllAlgorithms(
mdp_refined=mdp_ref_obj,
tolerance=this_tolerance,
exploring_start=exploring_start,
first_visit_mc=this_first_visit_mc,
def actor_spec(self) -> Tuple[FuncApproxSpec, FuncApproxSpec]:
ff = lambda s: (1. + self.r) ** float(s[0])
mean = FuncApproxSpec(
state_feature_funcs=[ff],
sa_feature_funcs=[lambda x, ff=ff: ff(x[0])],
dnn_spec=None
)
variance = FuncApproxSpec(
state_feature_funcs=[],
sa_feature_funcs=[],
dnn_spec=DNNSpec(
neurons=[],
hidden_activation=DNNSpec.log_squish,
hidden_activation_deriv=DNNSpec.log_squish_deriv,
output_activation=DNNSpec.pos_log_squish,
output_activation_deriv=DNNSpec.pos_log_squish_deriv
)
)
return mean, variance
lambda s: 1. if s == 1 else 0.,
lambda s: 1. if s == 2 else 0.,
lambda s: 1. if s == 3 else 0.
]
vf_fa_spec_val = FuncApproxSpec(
state_feature_funcs=state_ff,
sa_feature_funcs=[(lambda x, f=f: f(x[0])) for f in state_ff],
dnn_spec=DNNSpec(
neurons=[2],
hidden_activation=DNNSpec.relu,
hidden_activation_deriv=DNNSpec.relu_deriv,
output_activation=DNNSpec.identity,
output_activation_deriv=DNNSpec.identity_deriv
)
)
pol_fa_spec_val = [FuncApproxSpec(
state_feature_funcs=state_ff,
sa_feature_funcs=[(lambda x, f=f: f(x[0])) for f in state_ff],
dnn_spec=DNNSpec(
neurons=[3],
hidden_activation=DNNSpec.relu,
hidden_activation_deriv=DNNSpec.relu_deriv,
output_activation=DNNSpec.sigmoid,
output_activation_deriv=DNNSpec.sigmoid_deriv
)
)]
# noinspection PyPep8
this_score_func = lambda a, p: [1. / p[0] if a == (10,) else 1. / (p[0] - 1.)]
# noinspection PyPep8
sa_gen_func = lambda p, n: [((10,) if x == 1 else (-10,)) for x in binomial(1, p[0], n)]
adp_pg_obj = ADPPolicyGradient(
mdp_rep_for_adp_pg=mdp_rep_obj,
output_activation=DNNSpec.sigmoid,
output_activation_deriv=DNNSpec.sigmoid_deriv
)
)
actor_nu = FuncApproxSpec(
state_feature_funcs=[],
sa_feature_funcs=[],
dnn_spec=DNNSpec(
neurons=[],
hidden_activation=DNNSpec.log_squish,
hidden_activation_deriv=DNNSpec.log_squish_deriv,
output_activation=DNNSpec.pos_log_squish,
output_activation_deriv=DNNSpec.pos_log_squish_deriv
)
)
actor_mean = FuncApproxSpec(
state_feature_funcs=[],
sa_feature_funcs=[],
dnn_spec=None
)
actor_variance = FuncApproxSpec(
state_feature_funcs=[],
sa_feature_funcs=[],
dnn_spec=DNNSpec(
neurons=[],
hidden_activation=DNNSpec.log_squish,
hidden_activation_deriv=DNNSpec.log_squish_deriv,
output_activation=DNNSpec.pos_log_squish,
output_activation_deriv=DNNSpec.pos_log_squish_deriv
)
)
critic = FuncApproxSpec(
riskless_returns=riskfree_returns,
returns_gen_funcs=returns_genf,
cons_util_func=util_func,
beq_util_func=beq_util,
discount_rate=rho
)
reinforce_val = True
num_state_samples_val = 500
num_next_state_samples_val = 30
num_action_samples_val = 50
num_batches_val = 3000
actor_lambda_val = 0.99
critic_lambda_val = 0.99
actor_mu = FuncApproxSpec(
state_feature_funcs=[],
sa_feature_funcs=[],
dnn_spec=DNNSpec(
neurons=[],
hidden_activation=DNNSpec.log_squish,
hidden_activation_deriv=DNNSpec.log_squish_deriv,
output_activation=DNNSpec.sigmoid,
output_activation_deriv=DNNSpec.sigmoid_deriv
)
)
actor_nu = FuncApproxSpec(
state_feature_funcs=[],
sa_feature_funcs=[],
dnn_spec=DNNSpec(
neurons=[],
hidden_activation=DNNSpec.log_squish,
# noinspection PyShadowingNames
def state_ff(
state: Tuple[int, float],
tnu=tnu,
gam=gam
) -> float:
t = float(state[0]) * self.expiry / time_steps
tte = self.expiry - t
if tnu == 0:
ret = tte + self.epsilon
else:
ret = (1. + (tnu * self.epsilon - 1.) * np.exp(-tnu * tte)) / tnu
mult = state[1] ** gam / gam if gam != 0 else np.log(state[1])
return ret ** self.gamma * mult / np.exp(self.rho * t)
return FuncApproxSpec(
state_feature_funcs=[state_ff],
sa_feature_funcs=[lambda x, state_ff=state_ff: state_ff(x[0])],
dnn_spec=None
)
def get_actor_mean_spec() -> FuncApproxSpec:
return FuncApproxSpec(
state_feature_funcs=[],
sa_feature_funcs=[],
dnn_spec=None
)
state_feature_funcs=[],
sa_feature_funcs=[],
dnn_spec=None
)
actor_variance = FuncApproxSpec(
state_feature_funcs=[],
sa_feature_funcs=[],
dnn_spec=DNNSpec(
neurons=[],
hidden_activation=DNNSpec.log_squish,
hidden_activation_deriv=DNNSpec.log_squish_deriv,
output_activation=DNNSpec.pos_log_squish,
output_activation_deriv=DNNSpec.pos_log_squish_deriv
)
)
critic = FuncApproxSpec(
state_feature_funcs=[],
sa_feature_funcs=[],
dnn_spec=None
)
adp_pg_obj = portfolio_optimization.get_adp_pg_obj(
reinforce=reinforce_val,
num_state_samples=num_state_samples_val,
num_next_state_samples=num_next_state_samples_val,
num_action_samples=num_action_samples_val,
num_batches=num_batches_val,
actor_lambda=actor_lambda_val,
critic_lambda=critic_lambda_val,
actor_mu_spec=actor_mu,
actor_nu_spec=actor_nu,
actor_mean_spec=actor_mean,
def get_actor_mu_spec(self, time_steps: int) -> FuncApproxSpec:
tnu = self.get_nu()
# noinspection PyShadowingNames
def state_ff(state: Tuple[int, float], tnu=tnu) -> float:
tte = self.expiry * (1. - float(state[0]) / time_steps)
if tnu == 0:
ret = 1. / (tte + self.epsilon)
else:
ret = tnu / (1. + (tnu * self.epsilon - 1.) * np.exp(-tnu * tte))
return ret
return FuncApproxSpec(
state_feature_funcs=[state_ff],
sa_feature_funcs=[lambda x, state_ff=state_ff: state_ff(x[0])],
dnn_spec=DNNSpec(
neurons=[],
hidden_activation=DNNSpec.log_squish,
hidden_activation_deriv=DNNSpec.log_squish_deriv,
output_activation=DNNSpec.sigmoid,
output_activation_deriv=DNNSpec.sigmoid_deriv
)