How to use the lagom.envs.flatdim function in lagom

To help you get started, we’ve selected a few lagom examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github zuoxingdong / lagom / examples / reinforcement_learning / vpg / logs / default / source_files / agent.py View on Github external
def __init__(self, config, env, device, **kwargs):
        super().__init__(**kwargs)
        self.config = config
        self.env = env
        self.device = device
        
        self.feature_layers = make_fc(flatdim(env.observation_space), config['nn.sizes'])
        for layer in self.feature_layers:
            ortho_init(layer, nonlinearity='relu', constant_bias=0.0)
        self.layer_norms = nn.ModuleList([nn.LayerNorm(hidden_size) for hidden_size in config['nn.sizes']])
        
        self.to(self.device)
github zuoxingdong / lagom / examples / reinforcement_learning / ppo / logs / compare_tanh_and_relu_plus_layernorm / relu+layernorm / source_files / agent.py View on Github external
def __init__(self, config, env, device, **kwargs):
        super().__init__(config, env, device, **kwargs)
        
        feature_dim = config['nn.sizes'][-1]
        self.feature_network = MLP(config, env, device, **kwargs)
        if isinstance(env.action_space, Discrete):
            self.action_head = CategoricalHead(feature_dim, env.action_space.n, device, **kwargs)
        elif isinstance(env.action_space, Box):
            self.action_head = DiagGaussianHead(feature_dim, 
                                                flatdim(env.action_space), 
                                                device, 
                                                config['agent.std0'], 
                                                config['agent.std_style'], 
                                                config['agent.std_range'],
                                                config['agent.beta'], 
                                                **kwargs)
        self.V_head = nn.Linear(feature_dim, 1).to(device)
        ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0)
        
        self.total_timestep = 0
        
        self.optimizer = optim.Adam(self.parameters(), lr=config['agent.lr'])
        if config['agent.use_lr_scheduler']:
            self.lr_scheduler = linear_lr_scheduler(self.optimizer, config['train.timestep'], min_lr=1e-8)
github zuoxingdong / lagom / examples / reinforcement_learning / ppo / agent.py View on Github external
def __init__(self, config, env, device, **kwargs):
        super().__init__(**kwargs)
        self.config = config
        self.env = env
        self.device = device
        
        self.feature_layers = make_fc(flatdim(env.observation_space), config['nn.sizes'])
        for layer in self.feature_layers:
            ortho_init(layer, nonlinearity='relu', constant_bias=0.0)
        self.layer_norms = nn.ModuleList([nn.LayerNorm(hidden_size) for hidden_size in config['nn.sizes']])
        
        self.to(self.device)
github zuoxingdong / lagom / baselines / vpg / logs / default / source_files / agent.py View on Github external
def __init__(self, config, env, device, **kwargs):
        super().__init__(config, env, device, **kwargs)
        
        feature_dim = config['nn.sizes'][-1]
        self.feature_network = MLP(config, env, device, **kwargs)
        if isinstance(env.action_space, Discrete):
            self.action_head = CategoricalHead(feature_dim, env.action_space.n, device, **kwargs)
        elif isinstance(env.action_space, Box):
            self.action_head = DiagGaussianHead(feature_dim, flatdim(env.action_space), device, config['agent.std0'], **kwargs)
        self.V_head = nn.Linear(feature_dim, 1).to(device)
        ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0)
        
        self.total_timestep = 0
        
        self.optimizer = optim.Adam(self.parameters(), lr=config['agent.lr'])
        if config['agent.use_lr_scheduler']:
            self.lr_scheduler = linear_lr_scheduler(self.optimizer, config['train.timestep'], min_lr=1e-8)
github zuoxingdong / lagom / baselines / ppo / logs / default / source_files / agent.py View on Github external
def __init__(self, config, env, device, **kwargs):
        super().__init__(**kwargs)
        self.config = config
        self.env = env
        self.device = device
        
        self.feature_layers = make_fc(flatdim(env.observation_space), config['nn.sizes'])
        for layer in self.feature_layers:
            ortho_init(layer, nonlinearity='tanh', constant_bias=0.0)
        
        feature_dim = config['nn.sizes'][-1]
        self.V_head = nn.Linear(feature_dim, 1).to(device)
        ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0)
        
        self.to(self.device)
github zuoxingdong / lagom / baselines / sac / logs / old_default / source_files / agent.py View on Github external
def __init__(self, config, env, device, **kwargs):
        super().__init__(**kwargs)
        self.config = config
        self.env = env
        self.device = device
        
        # Q1
        self.first_feature_layers = make_fc(flatdim(env.observation_space) + flatdim(env.action_space), [256, 256])
        self.first_Q_head = nn.Linear(256, 1)
        
        # Q2
        self.second_feature_layers = make_fc(flatdim(env.observation_space) + flatdim(env.action_space), [256, 256])
        self.second_Q_head = nn.Linear(256, 1)
        
        self.to(self.device)
github zuoxingdong / lagom / baselines / td3 / logs / default / source_files / replay_buffer.py View on Github external
def __init__(self, env, capacity, device):
        self.env = env
        self.capacity = capacity
        self.device = device
        
        self.observations = np.zeros([capacity, flatdim(env.observation_space)], dtype=np.float32)
        self.actions = np.zeros([capacity, flatdim(env.action_space)], dtype=np.float32)
        self.rewards = np.zeros(capacity, dtype=np.float32)
        self.next_observations = np.zeros([capacity, flatdim(env.observation_space)], dtype=np.float32)
        self.masks = np.zeros(capacity, dtype=np.float32)
        
        self.size = 0
        self.pointer = 0