How to use the gym.Wrapper function in gym

To help you get started, we’ve selected a few gym examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github HumanCompatibleAI / adversarial-policies / src / aprl / envs / View on Github external
:param action_n (list): actions per agent.
           :return a tuple containing:
                obs_n (tuple): observations per agent.
                reward_n (tuple): reward per agent.
                done (bool): episode over.
                info (dict): auxiliary diagnostic info."""
        raise NotImplementedError

    def reset(self):
        """Resets state of environment.
        :return: observation (list): per agent."""
        raise NotImplementedError

class MultiWrapper(Wrapper, MultiAgentEnv):
    def __init__(self, env):
        Wrapper.__init__(self, env)
        MultiAgentEnv.__init__(self, getattr_unwrapped(env, "num_agents"))

class FakeSingleSpaces(gym.Env):
    """Creates a fake gym.Env that has action and observation spaces corresponding to
       those of agent_id in a MultiEnv env. This is useful for functions that construct
       policy or reward networks given an environment. It will throw an error if reset,
       step or other methods are called."""

    def __init__(self, env, agent_id=0):
        self.observation_space = env.observation_space.spaces[agent_id]
        self.action_space = env.action_space.spaces[agent_id]

github vwxyzjn / cleanrl / cleanrl / View on Github external
assert len(env.unwrapped.get_action_meanings()) >= 3

    def reset(self, **kwargs):
        obs, _, done, _ = self.env.step(1)
        if done:
        obs, _, done, _ = self.env.step(2)
        if done:
        return obs

    def step(self, ac):
        return self.env.step(ac)

class EpisodicLifeEnv(gym.Wrapper):
    def __init__(self, env):
        """Make end-of-life == end-of-episode, but only reset on true game over.
        Done by DeepMind for the DQN and co. since it helps value estimation.
        gym.Wrapper.__init__(self, env)
        self.lives = 0
        self.was_real_done  = True

    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        self.was_real_done = done
        # check current lives, make loss of life terminal,
        # then update lives to handle bonus lives
        lives = self.env.unwrapped.ale.lives()
        if lives < self.lives and lives > 0:
            # for Qbert sometimes we stay in lives == 0 condition for a few frames
github PaddlePaddle / PARL / examples / DQN / View on Github external
def FireResetEnv(env):
    if isinstance(env, gym.Wrapper):
        baseenv = env.unwrapped
        baseenv = env
    if 'FIRE' in baseenv.get_action_meanings():
        return _FireResetEnv(env)
    return env
github MillionIntegrals / vel / vel / openai / baselines / common / View on Github external
def __init__(self, env):
        """Take action on reset for environments that are fixed until firing."""
        gym.Wrapper.__init__(self, env)
        assert env.unwrapped.get_action_meanings()[1] == 'FIRE'
        assert len(env.unwrapped.get_action_meanings()) >= 3
github SurrealAI / surreal / surreal / env / View on Github external
def __init__(self, env, skip=4):
        """Return only every `skip`-th frame"""
        gym.Wrapper.__init__(self, env)
        # most recent raw observations (for max pooling across time steps)
        self._obs_buffer = np.zeros((2,)+env.observation_space.shape, dtype='uint8')
        self._skip       = skip
github vwxyzjn / cleanrl / cleanrl / View on Github external
assert len(env.unwrapped.get_action_meanings()) >= 3

    def reset(self, **kwargs):
        obs, _, done, _ = self.env.step(1)
        if done:
        obs, _, done, _ = self.env.step(2)
        if done:
        return obs

    def step(self, ac):
        return self.env.step(ac)

class EpisodicLifeEnv(gym.Wrapper):
    def __init__(self, env):
        """Make end-of-life == end-of-episode, but only reset on true game over.
        Done by DeepMind for the DQN and co. since it helps value estimation.
        gym.Wrapper.__init__(self, env)
        self.lives = 0
        self.was_real_done  = True

    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        self.was_real_done = done
        # check current lives, make loss of life terminal,
        # then update lives to handle bonus lives
        lives = self.env.unwrapped.ale.lives()
        if lives < self.lives and lives > 0:
            # for Qbert sometimes we stay in lives == 0 condition for a few frames
github openai / sonic-on-ray / sonic_on_ray / View on Github external
return self._out

    def __array__(self, dtype=None):
        out = self._force()
        if dtype is not None:
            out = out.astype(dtype)
        return out

    def __len__(self):
        return len(self._force())

    def __getitem__(self, i):
        return self._force()[i]

class FrameStack(gym.Wrapper):
    def __init__(self, env, k):
        """Stack the k last frames.

        Returns a lazy array, which is much more memory efficient.

        See Also
        gym.Wrapper.__init__(self, env)
        self.k = k
        self.frames = deque([], maxlen=k)
        shp = env.observation_space.shape
        self.observation_space = spaces.Box(low=0, high=255,
                                            shape=(shp[0], shp[1], shp[2] * k),
github learnables / cherry / cherry / envs / View on Github external
#!/usr/bin/env python3

import gym

from .utils import get_space_dimension, is_vectorized, is_discrete

class Wrapper(gym.Wrapper):

    This class allows to chain Environment Wrappers while still being able to
    access the properties of wrapped wrappers.


        env = gym.make('MyEnv-v0')
        env = envs.Logger(env)
        env = envs.Runner(env)
        env.log('asdf', 23)  # Uses log() method from envs.Logger.

    def is_vectorized(self):
        return is_vectorized(self)
github YuhangSong / DEHRL / deep_rl / component / View on Github external
if self.override_num_noops is not None:
            noops = self.override_num_noops
            noops = self.unwrapped.np_random.randint(1, self.noop_max + 1) #pylint: disable=E1101
        assert noops > 0
        obs = None
        for _ in range(noops):
            obs, _, done, _ = self.env.step(self.noop_action)
            if done:
                obs = self.env.reset(**kwargs)
        return obs

    def step(self, ac):
        return self.env.step(ac)

class FireResetEnv(gym.Wrapper):
    def __init__(self, env):
        """Take action on reset for environments that are fixed until firing."""
        gym.Wrapper.__init__(self, env)
        assert env.unwrapped.get_action_meanings()[1] == 'FIRE'
        assert len(env.unwrapped.get_action_meanings()) >= 3

    def reset(self, **kwargs):
        obs, _, done, _ = self.env.step(1)
        if done:
        obs, _, done, _ = self.env.step(2)
        if done:
        return obs