How to use the algorithms.common.abstract.agent.AbstractAgent function in algorithms

To help you get started, we’ve selected a few algorithms examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github medipixel / rl_algorithms / algorithms / a2c / agent.py View on Github external
args: argparse.Namespace,
        hyper_params: dict,
        models: tuple,
        optims: tuple,
    ):
        """Initialization.

        Args:
            env (gym.Env): openAI Gym environment
            args (argparse.Namespace): arguments including hyperparameters and training settings
            hyper_params (dict): hyper-parameters
            models (tuple): models including actor and critic
            optims (tuple): optimizers for actor and critic

        """
        AbstractAgent.__init__(self, env, args)

        self.actor, self.critic = models
        self.actor_optimizer, self.critic_optimizer = optims
        self.hyper_params = hyper_params
        self.log_prob = torch.zeros((1,))
        self.predicted_value = torch.zeros((1,))
        self.transition: list = list()
        self.episode_step = 0

        if args.load_from is not None and os.path.exists(args.load_from):
            self.load_params(args.load_from)
github medipixel / rl_algorithms / algorithms / per / sac_agent.py View on Github external
models: tuple,
        optims: tuple,
        target_entropy: float,
    ):
        """Initialization.

        Args:
            env (gym.Env): openAI Gym environment
            args (argparse.Namespace): arguments including hyperparameters and training settings
            hyper_params (dict): hyper-parameters
            models (tuple): models including actor and critic
            optims (tuple): optimizers for actor and critic
            target_entropy (float): target entropy for the inequality constraint

        """
        AbstractAgent.__init__(self, env, args)

        self.actor, self.vf, self.vf_target, self.qf_1, self.qf_2 = models
        self.actor_optimizer, self.vf_optimizer = optims[0:2]
        self.qf_1_optimizer, self.qf_2_optimizer = optims[2:4]
        self.hyper_params = hyper_params
        self.curr_state = np.zeros((1,))
        self.total_step = 0
        self.episode_step = 0

        # automatic entropy tuning
        if hyper_params["AUTO_ENTROPY_TUNING"]:
            self.target_entropy = target_entropy
            self.log_alpha = torch.zeros(1, requires_grad=True, device=device)
            self.alpha_optimizer = optim.Adam(
                [self.log_alpha], lr=hyper_params["LR_ENTROPY"]
            )
github medipixel / rl_algorithms / algorithms / dpg / agent.py View on Github external
import argparse
import os
from typing import Tuple

import gym
import numpy as np
import torch
import torch.nn.functional as F
import wandb

from algorithms.common.abstract.agent import AbstractAgent

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


class Agent(AbstractAgent):
    """ActorCritic interacting with environment.

    Attributes:
        actor (nn.Module): actor model to select actions
        critic (nn.Module): critic model to predict values
        hyper_params (dict): hyper-parameters
        actor_optimizer (Optimizer): actor optimizer for training
        critic_optimizer (Optimizer): critic optimizer for training

    """

    def __init__(
        self,
        env: gym.Env,
        args: argparse.Namespace,
        hyper_params: dict,
github medipixel / rl_algorithms / algorithms / per / td3_agent.py View on Github external
from typing import Tuple

import gym
import numpy as np
import torch
import wandb

import algorithms.common.helper_functions as common_utils
from algorithms.common.abstract.agent import AbstractAgent
from algorithms.common.buffer.priortized_replay_buffer import PrioritizedReplayBuffer
from algorithms.common.noise import GaussianNoise

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


class Agent(AbstractAgent):
    """ActorCritic interacting with environment.

    Attributes:
        memory (PrioritizedReplayBuffer): replay memory
        noise (GaussianNoise): random noise for exploration
        actor (nn.Module): actor model to select actions
        critic_1 (nn.Module): critic model to predict state values
        critic_2 (nn.Module): critic model to predict state values
        critic_target1 (nn.Module): target critic model to predict state values
        critic_target2 (nn.Module): target critic model to predict state values
        actor_target (nn.Module): target actor model to select actions
        critic_optimizer1 (Optimizer): optimizer for training critic_1
        critic_optimizer2 (Optimizer): optimizer for training critic_2
        actor_optimizer (Optimizer): optimizer for training actor
        hyper_params (dict): hyper-parameters
        curr_state (np.ndarray): temporary storage of the current state
github medipixel / rl_algorithms / algorithms / a2c / agent.py View on Github external
import os
from typing import Tuple

import gym
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from algorithms.common.abstract.agent import AbstractAgent
import wandb

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


class Agent(AbstractAgent):
    """1-Step Advantage Actor-Critic interacting with environment.

    Attributes:
        actor (nn.Module): policy model to select actions
        critic (nn.Module): critic model to evaluate states
        hyper_params (dict): hyper-parameters
        actor_optimizer (Optimizer): optimizer for actor
        critic_optimizer (Optimizer): optimizer for critic
        optimizer (Optimizer): optimizer for training
        episode_step (int): step number of the current episode
        transition (list): recent transition information

    """

    def __init__(
        self,