Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
args: argparse.Namespace,
hyper_params: dict,
models: tuple,
optims: tuple,
):
"""Initialization.
Args:
env (gym.Env): openAI Gym environment
args (argparse.Namespace): arguments including hyperparameters and training settings
hyper_params (dict): hyper-parameters
models (tuple): models including actor and critic
optims (tuple): optimizers for actor and critic
"""
AbstractAgent.__init__(self, env, args)
self.actor, self.critic = models
self.actor_optimizer, self.critic_optimizer = optims
self.hyper_params = hyper_params
self.log_prob = torch.zeros((1,))
self.predicted_value = torch.zeros((1,))
self.transition: list = list()
self.episode_step = 0
if args.load_from is not None and os.path.exists(args.load_from):
self.load_params(args.load_from)
models: tuple,
optims: tuple,
target_entropy: float,
):
"""Initialization.
Args:
env (gym.Env): openAI Gym environment
args (argparse.Namespace): arguments including hyperparameters and training settings
hyper_params (dict): hyper-parameters
models (tuple): models including actor and critic
optims (tuple): optimizers for actor and critic
target_entropy (float): target entropy for the inequality constraint
"""
AbstractAgent.__init__(self, env, args)
self.actor, self.vf, self.vf_target, self.qf_1, self.qf_2 = models
self.actor_optimizer, self.vf_optimizer = optims[0:2]
self.qf_1_optimizer, self.qf_2_optimizer = optims[2:4]
self.hyper_params = hyper_params
self.curr_state = np.zeros((1,))
self.total_step = 0
self.episode_step = 0
# automatic entropy tuning
if hyper_params["AUTO_ENTROPY_TUNING"]:
self.target_entropy = target_entropy
self.log_alpha = torch.zeros(1, requires_grad=True, device=device)
self.alpha_optimizer = optim.Adam(
[self.log_alpha], lr=hyper_params["LR_ENTROPY"]
)
import argparse
import os
from typing import Tuple
import gym
import numpy as np
import torch
import torch.nn.functional as F
import wandb
from algorithms.common.abstract.agent import AbstractAgent
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class Agent(AbstractAgent):
"""ActorCritic interacting with environment.
Attributes:
actor (nn.Module): actor model to select actions
critic (nn.Module): critic model to predict values
hyper_params (dict): hyper-parameters
actor_optimizer (Optimizer): actor optimizer for training
critic_optimizer (Optimizer): critic optimizer for training
"""
def __init__(
self,
env: gym.Env,
args: argparse.Namespace,
hyper_params: dict,
from typing import Tuple
import gym
import numpy as np
import torch
import wandb
import algorithms.common.helper_functions as common_utils
from algorithms.common.abstract.agent import AbstractAgent
from algorithms.common.buffer.priortized_replay_buffer import PrioritizedReplayBuffer
from algorithms.common.noise import GaussianNoise
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class Agent(AbstractAgent):
"""ActorCritic interacting with environment.
Attributes:
memory (PrioritizedReplayBuffer): replay memory
noise (GaussianNoise): random noise for exploration
actor (nn.Module): actor model to select actions
critic_1 (nn.Module): critic model to predict state values
critic_2 (nn.Module): critic model to predict state values
critic_target1 (nn.Module): target critic model to predict state values
critic_target2 (nn.Module): target critic model to predict state values
actor_target (nn.Module): target actor model to select actions
critic_optimizer1 (Optimizer): optimizer for training critic_1
critic_optimizer2 (Optimizer): optimizer for training critic_2
actor_optimizer (Optimizer): optimizer for training actor
hyper_params (dict): hyper-parameters
curr_state (np.ndarray): temporary storage of the current state
import os
from typing import Tuple
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from algorithms.common.abstract.agent import AbstractAgent
import wandb
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class Agent(AbstractAgent):
"""1-Step Advantage Actor-Critic interacting with environment.
Attributes:
actor (nn.Module): policy model to select actions
critic (nn.Module): critic model to evaluate states
hyper_params (dict): hyper-parameters
actor_optimizer (Optimizer): optimizer for actor
critic_optimizer (Optimizer): optimizer for critic
optimizer (Optimizer): optimizer for training
episode_step (int): step number of the current episode
transition (list): recent transition information
"""
def __init__(
self,