How to use the algorithms.func_approx_spec.FuncApproxSpec function in algorithms

To help you get started, we’ve selected a few algorithms examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github coverdrive / MDP-DP-RL / src / examples / run_all_algorithms.py View on Github external
exploring_start = False
    this_first_visit_mc = True
    num_samples = 30
    this_softmax = True
    this_epsilon = 0.05
    this_epsilon_half_life = 30
    this_learning_rate = 0.1
    this_learning_rate_decay = 1e6
    this_lambd = 0.8
    this_num_episodes = 3000
    this_batch_size = 10
    this_max_steps = 1000
    this_tdl_fa_offline = True
    state_ffs = FuncApproxBase.get_identity_feature_funcs(ic.lead_time + 1)
    sa_ffs = [(lambda x, f=f: f(x[0])) for f in state_ffs] + [lambda x: x[1]]
    this_fa_spec = FuncApproxSpec(
        state_feature_funcs=state_ffs,
        sa_feature_funcs=sa_ffs,
        dnn_spec=DNNSpec(
            neurons=[2, 4],
            hidden_activation=DNNSpec.relu,
            hidden_activation_deriv=DNNSpec.relu_deriv,
            output_activation=DNNSpec.identity,
            output_activation_deriv=DNNSpec.identity_deriv
        )
    )

    raa = RunAllAlgorithms(
        mdp_refined=mdp_ref_obj,
        tolerance=this_tolerance,
        exploring_start=exploring_start,
        first_visit_mc=this_first_visit_mc,
github coverdrive / MDP-DP-RL / src / examples / port_opt / single_asset_cara.py View on Github external
def actor_spec(self) -> Tuple[FuncApproxSpec, FuncApproxSpec]:
        ff = lambda s: (1. + self.r) ** float(s[0])
        mean = FuncApproxSpec(
            state_feature_funcs=[ff],
            sa_feature_funcs=[lambda x, ff=ff: ff(x[0])],
            dnn_spec=None
        )
        variance = FuncApproxSpec(
            state_feature_funcs=[],
            sa_feature_funcs=[],
            dnn_spec=DNNSpec(
                neurons=[],
                hidden_activation=DNNSpec.log_squish,
                hidden_activation_deriv=DNNSpec.log_squish_deriv,
                output_activation=DNNSpec.pos_log_squish,
                output_activation_deriv=DNNSpec.pos_log_squish_deriv
            )
        )
        return mean, variance
github coverdrive / MDP-DP-RL / src / algorithms / adp / adp_pg.py View on Github external
lambda s: 1. if s == 1 else 0.,
        lambda s: 1. if s == 2 else 0.,
        lambda s: 1. if s == 3 else 0.
    ]
    vf_fa_spec_val = FuncApproxSpec(
        state_feature_funcs=state_ff,
        sa_feature_funcs=[(lambda x, f=f: f(x[0])) for f in state_ff],
        dnn_spec=DNNSpec(
            neurons=[2],
            hidden_activation=DNNSpec.relu,
            hidden_activation_deriv=DNNSpec.relu_deriv,
            output_activation=DNNSpec.identity,
            output_activation_deriv=DNNSpec.identity_deriv
        )
    )
    pol_fa_spec_val = [FuncApproxSpec(
        state_feature_funcs=state_ff,
        sa_feature_funcs=[(lambda x, f=f: f(x[0])) for f in state_ff],
        dnn_spec=DNNSpec(
            neurons=[3],
            hidden_activation=DNNSpec.relu,
            hidden_activation_deriv=DNNSpec.relu_deriv,
            output_activation=DNNSpec.sigmoid,
            output_activation_deriv=DNNSpec.sigmoid_deriv
        )
    )]
    # noinspection PyPep8
    this_score_func = lambda a, p: [1. / p[0] if a == (10,) else 1. / (p[0] - 1.)]
    # noinspection PyPep8
    sa_gen_func = lambda p, n: [((10,) if x == 1 else (-10,)) for x in binomial(1, p[0], n)]
    adp_pg_obj = ADPPolicyGradient(
        mdp_rep_for_adp_pg=mdp_rep_obj,
github coverdrive / MDP-DP-RL / src / examples / port_opt / port_opt.py View on Github external
output_activation=DNNSpec.sigmoid,
            output_activation_deriv=DNNSpec.sigmoid_deriv
        )
    )
    actor_nu = FuncApproxSpec(
        state_feature_funcs=[],
        sa_feature_funcs=[],
        dnn_spec=DNNSpec(
            neurons=[],
            hidden_activation=DNNSpec.log_squish,
            hidden_activation_deriv=DNNSpec.log_squish_deriv,
            output_activation=DNNSpec.pos_log_squish,
            output_activation_deriv=DNNSpec.pos_log_squish_deriv
        )
    )
    actor_mean = FuncApproxSpec(
        state_feature_funcs=[],
        sa_feature_funcs=[],
        dnn_spec=None
    )
    actor_variance = FuncApproxSpec(
        state_feature_funcs=[],
        sa_feature_funcs=[],
        dnn_spec=DNNSpec(
            neurons=[],
            hidden_activation=DNNSpec.log_squish,
            hidden_activation_deriv=DNNSpec.log_squish_deriv,
            output_activation=DNNSpec.pos_log_squish,
            output_activation_deriv=DNNSpec.pos_log_squish_deriv
        )
    )
    critic = FuncApproxSpec(
github coverdrive / MDP-DP-RL / src / examples / port_opt / port_opt.py View on Github external
riskless_returns=riskfree_returns,
        returns_gen_funcs=returns_genf,
        cons_util_func=util_func,
        beq_util_func=beq_util,
        discount_rate=rho
    )

    reinforce_val = True
    num_state_samples_val = 500
    num_next_state_samples_val = 30
    num_action_samples_val = 50
    num_batches_val = 3000
    actor_lambda_val = 0.99
    critic_lambda_val = 0.99

    actor_mu = FuncApproxSpec(
        state_feature_funcs=[],
        sa_feature_funcs=[],
        dnn_spec=DNNSpec(
            neurons=[],
            hidden_activation=DNNSpec.log_squish,
            hidden_activation_deriv=DNNSpec.log_squish_deriv,
            output_activation=DNNSpec.sigmoid,
            output_activation_deriv=DNNSpec.sigmoid_deriv
        )
    )
    actor_nu = FuncApproxSpec(
        state_feature_funcs=[],
        sa_feature_funcs=[],
        dnn_spec=DNNSpec(
            neurons=[],
            hidden_activation=DNNSpec.log_squish,
github coverdrive / MDP-DP-RL / src / examples / port_opt / merton_portfolio.py View on Github external
# noinspection PyShadowingNames
        def state_ff(
            state: Tuple[int, float],
            tnu=tnu,
            gam=gam
        ) -> float:
            t = float(state[0]) * self.expiry / time_steps
            tte = self.expiry - t
            if tnu == 0:
                ret = tte + self.epsilon
            else:
                ret = (1. + (tnu * self.epsilon - 1.) * np.exp(-tnu * tte)) / tnu
            mult = state[1] ** gam / gam if gam != 0 else np.log(state[1])
            return ret ** self.gamma * mult / np.exp(self.rho * t)

        return FuncApproxSpec(
            state_feature_funcs=[state_ff],
            sa_feature_funcs=[lambda x, state_ff=state_ff: state_ff(x[0])],
            dnn_spec=None
        )
github coverdrive / MDP-DP-RL / src / examples / port_opt / merton_portfolio.py View on Github external
def get_actor_mean_spec() -> FuncApproxSpec:
        return FuncApproxSpec(
            state_feature_funcs=[],
            sa_feature_funcs=[],
            dnn_spec=None
        )
github coverdrive / MDP-DP-RL / src / examples / port_opt / port_opt.py View on Github external
state_feature_funcs=[],
        sa_feature_funcs=[],
        dnn_spec=None
    )
    actor_variance = FuncApproxSpec(
        state_feature_funcs=[],
        sa_feature_funcs=[],
        dnn_spec=DNNSpec(
            neurons=[],
            hidden_activation=DNNSpec.log_squish,
            hidden_activation_deriv=DNNSpec.log_squish_deriv,
            output_activation=DNNSpec.pos_log_squish,
            output_activation_deriv=DNNSpec.pos_log_squish_deriv
        )
    )
    critic = FuncApproxSpec(
        state_feature_funcs=[],
        sa_feature_funcs=[],
        dnn_spec=None
    )

    adp_pg_obj = portfolio_optimization.get_adp_pg_obj(
        reinforce=reinforce_val,
        num_state_samples=num_state_samples_val,
        num_next_state_samples=num_next_state_samples_val,
        num_action_samples=num_action_samples_val,
        num_batches=num_batches_val,
        actor_lambda=actor_lambda_val,
        critic_lambda=critic_lambda_val,
        actor_mu_spec=actor_mu,
        actor_nu_spec=actor_nu,
        actor_mean_spec=actor_mean,
github coverdrive / MDP-DP-RL / src / examples / port_opt / merton_portfolio.py View on Github external
def get_actor_mu_spec(self, time_steps: int) -> FuncApproxSpec:
        tnu = self.get_nu()

        # noinspection PyShadowingNames
        def state_ff(state: Tuple[int, float], tnu=tnu) -> float:
            tte = self.expiry * (1. - float(state[0]) / time_steps)
            if tnu == 0:
                ret = 1. / (tte + self.epsilon)
            else:
                ret = tnu / (1. + (tnu * self.epsilon - 1.) * np.exp(-tnu * tte))
            return ret

        return FuncApproxSpec(
            state_feature_funcs=[state_ff],
            sa_feature_funcs=[lambda x, state_ff=state_ff: state_ff(x[0])],
            dnn_spec=DNNSpec(
                neurons=[],
                hidden_activation=DNNSpec.log_squish,
                hidden_activation_deriv=DNNSpec.log_squish_deriv,
                output_activation=DNNSpec.sigmoid,
                output_activation_deriv=DNNSpec.sigmoid_deriv
            )