Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_timestep_env(env_id):
env = gym.make(env_id)
wrapped_env = TimeStepEnv(gym.make(env_id))
env.seed(0)
wrapped_env.seed(0)
obs = env.reset()
timestep = wrapped_env.reset()
assert timestep.first()
assert np.allclose(timestep.observation, obs)
for t in range(env.spec.max_episode_steps):
action = env.action_space.sample()
obs, reward, done, info = env.step(action)
timestep = wrapped_env.step(action)
assert np.allclose(timestep.observation, obs)
assert timestep.reward == reward
assert timestep.done == done
def make_env(config, seed, mode):
assert mode in ['train', 'eval']
env = gym.make(config['env.id'])
env.seed(seed)
env.observation_space.seed(seed)
env.action_space.seed(seed)
if config['env.clip_action'] and isinstance(env.action_space, gym.spaces.Box):
env = gym.wrappers.ClipAction(env)
if mode == 'train':
env = RecordEpisodeStatistics(env, deque_size=100)
if config['env.normalize_obs']:
env = NormalizeObservation(env, clip=5.)
if config['env.normalize_reward']:
env = NormalizeReward(env, clip=10., gamma=config['agent.gamma'])
env = TimeStepEnv(env)
return env
def make_env(config, seed, mode):
assert mode in ['train', 'eval']
env = gym.make(config['env.id'])
env.seed(seed)
env.observation_space.seed(seed)
env.action_space.seed(seed)
if config['env.clip_action'] and isinstance(env.action_space, gym.spaces.Box):
env = gym.wrappers.ClipAction(env) # TODO: use tanh to squash policy output when RescaleAction wrapper merged in gym
env = TimeStepEnv(env)
return env
def make_env(config, seed, mode):
assert mode in ['train', 'eval']
env = gym.make(config['env.id'])
env.seed(seed)
env.observation_space.seed(seed)
env.action_space.seed(seed)
env = NormalizeAction(env) # TODO: use gym new wrapper RescaleAction when it's merged
if mode == 'eval':
env = RecordEpisodeStatistics(env, deque_size=100)
env = TimeStepEnv(env)
return env
def make_env(config, seed, mode):
assert mode in ['train', 'eval']
env = gym.make(config['env.id'])
env.seed(seed)
env.observation_space.seed(seed)
env.action_space.seed(seed)
if config['env.clip_action'] and isinstance(env.action_space, gym.spaces.Box):
env = gym.wrappers.ClipAction(env) # TODO: use tanh to squash policy output when RescaleAction wrapper merged in gym
env = TimeStepEnv(env)
return env
def make_env(config, seed, mode):
assert mode in ['train', 'eval']
env = gym.make(config['env.id'])
env.seed(seed)
env.observation_space.seed(seed)
env.action_space.seed(seed)
if mode == 'eval':
env = RecordEpisodeStatistics(env, deque_size=100)
env = TimeStepEnv(env)
return env
def make_env(config, seed, mode):
assert mode in ['train', 'eval']
env = gym.make(config['env.id'])
env.seed(seed)
env.observation_space.seed(seed)
env.action_space.seed(seed)
if config['env.clip_action'] and isinstance(env.action_space, gym.spaces.Box):
env = gym.wrappers.ClipAction(env) # TODO: use tanh to squash policy output when RescaleAction wrapper merged in gym
env = TimeStepEnv(env)
return env