How to use the algorithms.common.helper_functions function in algorithms

To help you get started, we’ve selected a few algorithms examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github medipixel / rl_algorithms / algorithms / fd / sac_agent.py View on Github external
std_reg = self.hyper_params["W_STD_REG"] * std.pow(2).mean()
                pre_activation_reg = self.hyper_params["W_PRE_ACTIVATION_REG"] * (
                    pre_tanh_value.pow(2).sum(dim=-1).mean()
                )
                actor_reg = mean_reg + std_reg + pre_activation_reg

                # actor loss + regularization
                actor_loss += actor_reg

            # train actor
            self.actor_optimizer.zero_grad()
            actor_loss.backward()
            self.actor_optimizer.step()

            # update target networks
            common_utils.soft_update(self.vf, self.vf_target, self.hyper_params["TAU"])

            # update priorities
            new_priorities = vf_loss_element_wise
            new_priorities += self.hyper_params[
                "LAMBDA3"
            ] * actor_loss_element_wise.pow(2)
            new_priorities += self.hyper_params["PER_EPS"]
            new_priorities = new_priorities.data.cpu().numpy().squeeze()
            new_priorities += eps_d
            self.memory.update_priorities(indices, new_priorities)

            # increase beta
            fraction = min(float(self.i_episode) / self.args.episode_num, 1.0)
            self.beta = self.beta + fraction * (1.0 - self.beta)
        else:
            actor_loss = torch.zeros(1)
github medipixel / rl_algorithms / algorithms / sac / agent.py View on Github external
std_reg = self.hyper_params["W_STD_REG"] * std.pow(2).mean()
                pre_activation_reg = self.hyper_params["W_PRE_ACTIVATION_REG"] * (
                    pre_tanh_value.pow(2).sum(dim=-1).mean()
                )
                actor_reg = mean_reg + std_reg + pre_activation_reg

                # actor loss + regularization
                actor_loss += actor_reg

            # train actor
            self.actor_optimizer.zero_grad()
            actor_loss.backward()
            self.actor_optimizer.step()

            # update target networks
            common_utils.soft_update(self.vf, self.vf_target, self.hyper_params["TAU"])
        else:
            actor_loss = torch.zeros(1)

        return (
            actor_loss.item(),
            qf_1_loss.item(),
            qf_2_loss.item(),
            vf_loss.item(),
            alpha_loss.item(),
        )
github medipixel / rl_algorithms / algorithms / per / ddpg_agent.py View on Github external
nn.utils.clip_grad_norm_(self.critic.parameters(), gradient_clip_cr)
        self.critic_optimizer.step()

        # train actor
        gradient_clip_ac = self.hyper_params["GRADIENT_CLIP_AC"]
        actions = self.actor(states)
        actor_loss_element_wise = -self.critic(torch.cat((states, actions), dim=-1))
        actor_loss = torch.mean(actor_loss_element_wise * weights)
        self.actor_optimizer.zero_grad()
        actor_loss.backward()
        nn.utils.clip_grad_norm_(self.actor.parameters(), gradient_clip_ac)
        self.actor_optimizer.step()

        # update target networks
        tau = self.hyper_params["TAU"]
        common_utils.soft_update(self.actor, self.actor_target, tau)
        common_utils.soft_update(self.critic, self.critic_target, tau)

        # update priorities in PER
        new_priorities = critic_loss_element_wise
        new_priorities = (
            new_priorities.data.cpu().numpy() + self.hyper_params["PER_EPS"]
        )
        self.memory.update_priorities(indices, new_priorities)

        # increase beta
        fraction = min(float(self.i_episode) / self.args.episode_num, 1.0)
        self.beta = self.beta + fraction * (1.0 - self.beta)

        return actor_loss.item(), critic_loss.item()
github medipixel / rl_algorithms / run_reacher_v2.py View on Github external
def main():
    """Main."""
    # env initialization
    env = gym.make("Reacher-v2")
    env_utils.set_env(env, args)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]

    # set a random seed
    common_utils.set_random_seed(args.seed, env)

    # run
    module_path = "examples.reacher_v2." + args.algo
    example = importlib.import_module(module_path)
    example.run(env, args, state_dim, action_dim)
github medipixel / rl_algorithms / algorithms / bc / ddpg_agent.py View on Github external
bc_loss = (
                torch.mul(pred_actions, qf_mask) - torch.mul(demo_actions, qf_mask)
            ).pow(2).sum() / n_qf_mask

        # train actor: pg loss + BC loss
        actor_loss = self.lambda1 * policy_loss + self.lambda2 * bc_loss

        gradient_clip_ac = self.hyper_params["GRADIENT_CLIP_AC"]
        self.actor_optimizer.zero_grad()
        actor_loss.backward()
        nn.utils.clip_grad_norm_(self.actor.parameters(), gradient_clip_ac)
        self.actor_optimizer.step()

        # update target networks
        tau = self.hyper_params["TAU"]
        common_utils.soft_update(self.actor, self.actor_target, tau)
        common_utils.soft_update(self.critic, self.critic_target, tau)

        return actor_loss.item(), critic_loss.item(), n_qf_mask
github medipixel / rl_algorithms / run_lunarlander_continuous_v2.py View on Github external
def main():
    """Main."""
    # env initialization
    env = gym.make("LunarLanderContinuous-v2")
    env_utils.set_env(env, args)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]

    # set a random seed
    common_utils.set_random_seed(args.seed, env)

    # run
    module_path = "examples.lunarlander_continuous_v2." + args.algo
    example = importlib.import_module(module_path)
    example.run(env, args, state_dim, action_dim)
github medipixel / rl_algorithms / algorithms / fd / dqn_agent.py View on Github external
# q_value regularization
        q_regular = torch.norm(q_values, 2).mean() * self.hyper_params["W_Q_REG"]

        # total loss
        loss = dq_loss + supervised_loss + q_regular

        # train dqn
        self.dqn_optimizer.zero_grad()
        loss.backward()
        clip_grad_norm_(self.dqn.parameters(), self.hyper_params["GRADIENT_CLIP"])
        self.dqn_optimizer.step()

        # update target networks
        tau = self.hyper_params["TAU"]
        common_utils.soft_update(self.dqn, self.dqn_target, tau)

        # update priorities in PER
        loss_for_prior = dq_loss_element_wise.detach().cpu().numpy().squeeze()
        new_priorities = loss_for_prior + self.hyper_params["PER_EPS"]
        new_priorities += eps_d
        self.memory.update_priorities(indices, new_priorities)

        # increase beta
        fraction = min(float(self.i_episode) / self.args.episode_num, 1.0)
        self.beta = self.beta + fraction * (1.0 - self.beta)

        if self.hyper_params["USE_NOISY_NET"]:
            self.dqn.reset_noise()
            self.dqn_target.reset_noise()

        return (
github medipixel / rl_algorithms / algorithms / bc / ddpg_agent.py View on Github external
torch.mul(pred_actions, qf_mask) - torch.mul(demo_actions, qf_mask)
            ).pow(2).sum() / n_qf_mask

        # train actor: pg loss + BC loss
        actor_loss = self.lambda1 * policy_loss + self.lambda2 * bc_loss

        gradient_clip_ac = self.hyper_params["GRADIENT_CLIP_AC"]
        self.actor_optimizer.zero_grad()
        actor_loss.backward()
        nn.utils.clip_grad_norm_(self.actor.parameters(), gradient_clip_ac)
        self.actor_optimizer.step()

        # update target networks
        tau = self.hyper_params["TAU"]
        common_utils.soft_update(self.actor, self.actor_target, tau)
        common_utils.soft_update(self.critic, self.critic_target, tau)

        return actor_loss.item(), critic_loss.item(), n_qf_mask
github medipixel / rl_algorithms / algorithms / fd / ddpg_agent.py View on Github external
def _initialize(self):
        """Initialize non-common things."""
        self.use_n_step = self.hyper_params["N_STEP"] > 1

        if not self.args.test:
            # load demo replay memory
            with open(self.args.demo_path, "rb") as f:
                demos = pickle.load(f)

            if self.use_n_step:
                demos, demos_n_step = common_utils.get_n_step_info_from_demo(
                    demos, self.hyper_params["N_STEP"], self.hyper_params["GAMMA"]
                )

                # replay memory for multi-steps
                self.memory_n = ReplayBuffer(
                    buffer_size=self.hyper_params["BUFFER_SIZE"],
                    n_step=self.hyper_params["N_STEP"],
                    gamma=self.hyper_params["GAMMA"],
                    demo=demos_n_step,
                )

            # replay memory for a single step
            self.beta = self.hyper_params["PER_BETA"]
            self.memory = PrioritizedReplayBuffer(
                self.hyper_params["BUFFER_SIZE"],
                self.hyper_params["BATCH_SIZE"],