How to use the gym.utils function in gym

To help you get started, we’ve selected a few gym examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github fabiopardo / qmap / qmap / envs / custom_montezuma.py View on Github external
def __init__(self, screen_ratio=4, coords_ratio=4, use_color=True, use_rc_frame=True, stack=3, frame_skip=4, action_repeat=4):
        utils.EzPickle.__init__(self, 'montezuma_revenge', 'image')
        self.env = gym.make('MontezumaRevengeNoFrameskip-v4').unwrapped
        self.ale = self.env.ale
        self.ale.setFloat('repeat_action_probability'.encode('utf-8'), 0) # deterministic
        self.max_lives = self.ale.lives()
        # observations
        self.screen_ratio = screen_ratio
        self.original_height = 224
        self.original_width = 160
        self.screen_height = self.original_height // screen_ratio
        self.screen_width = self.original_width // screen_ratio
        self.screen_shape = (self.screen_height, self.screen_width)
        self.use_color = use_color
        self.use_rc_frame = use_rc_frame
        self.stack = stack
        self.frame_skip = frame_skip
        n_frames = stack * (3 * use_color + 1 * (not use_color) + use_rc_frame)
github avisingh599 / reward-learning-rl / softlearning / environments / gym / mujoco / hopper_env.py View on Github external
import numpy as np
from gym.envs.mujoco import mujoco_env
from gym import utils


DEFAULT_CAMERA_CONFIG = {
    'trackbodyid': 2,
    'distance': 3.0,
    'lookat': np.array((0.0, 0.0, 1.15)),
    'elevation': -20.0,
}


class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self,
                 xml_file='hopper.xml',
                 forward_reward_weight=1.0,
                 ctrl_cost_weight=1e-3,
                 healthy_reward=1.0,
                 terminate_when_unhealthy=True,
                 healthy_state_range=(-100.0, 100.0),
                 healthy_z_range=(0.7, float('inf')),
                 healthy_angle_range=(-0.2, 0.2),
                 reset_noise_scale=5e-3,
                 exclude_current_positions_from_observation=True):
        utils.EzPickle.__init__(**locals())

        self._forward_reward_weight = forward_reward_weight

        self._ctrl_cost_weight = ctrl_cost_weight
github nadavbh12 / gym-rle / gym_rle / envs / rle_env.py View on Github external
action_string, first = check_button(action_string, action, 0x4000, 'L3', first)
    action_string, first = check_button(action_string, action, 0x8000, 'R3', first)
    if action_string == '':
        action_string = 'NOOP'

    return action_string


def to_ram(rle):
    ram_size = rle.getRAMSize()
    ram = np.zeros((ram_size),dtype=np.uint8)
    rle.getRAM(ram)
    return ram


class RleEnv(gym.Env, utils.EzPickle):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, game='classic_kong', obs_type='ram', frameskip=(2, 5), repeat_action_probability=0.):
        """Frameskip should be either a tuple (indicating a random range to
        choose from, with the top value exclude), or an int."""

        utils.EzPickle.__init__(self, game, obs_type)
        assert obs_type in ('ram', 'image')

        self.game_path = self.get_rom_path(game)

        self._obs_type = obs_type
        self.frameskip = frameskip
        self.rle = rle_python_interface.RLEInterface()
        self.viewer = None
github SuReLI / dyna-gym / dyna_gym / envs / nscliff_v0.py View on Github external
def _seed(self, seed=None):
        self.np_random, seed = utils.seeding.np_random(seed)
        return [seed]
github lerrel / gym-adv / gym / envs / adversarial / mujoco / inverted_pendulum.py View on Github external
def __init__(self):
        utils.EzPickle.__init__(self)
        mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
        ## Adversarial setup
        self._adv_f_bname = b'pole' #Byte String name of body on which the adversary force will be applied
        bnames = self.model.body_names
        self._adv_bindex = bnames.index(self._adv_f_bname) #Index of the body on which the adversary force will be applied
        adv_max_force = 5.
        high_adv = np.ones(2)*adv_max_force
        low_adv = -high_adv
        self.adv_action_space = spaces.Box(low_adv, high_adv)
        self.pro_action_space = self.action_space
github openai / mlsh / gym / gym / envs / mujoco / ant.py View on Github external
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env

class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self):
        mujoco_env.MujocoEnv.__init__(self, 'ant.xml', 5)
        utils.EzPickle.__init__(self)

    def _step(self, a):
        xposbefore = self.get_body_com("torso")[0]
        self.do_simulation(a, self.frame_skip)
        xposafter = self.get_body_com("torso")[0]
        forward_reward = (xposafter - xposbefore)/self.dt
        ctrl_cost = .5 * np.square(a).sum()
        contact_cost = 0.5 * 1e-3 * np.sum(
            np.square(np.clip(self.model.data.cfrc_ext, -1, 1)))
        survive_reward = 1.0
        reward = forward_reward - ctrl_cost - contact_cost + survive_reward
        state = self.state_vector()
        notdone = np.isfinite(state).all() \
github VincentYu68 / policy_transfer / policy_transfer / envs / mujoco / half_cheetah.py View on Github external
self.include_obs_history = 1
        self.include_act_history = 0

        # data structure for modeling delays in observation and action
        self.observation_buffer = []
        self.action_buffer = []
        self.obs_delay = 0
        self.act_delay = 0
        self.tilt_z = 0

        self.current_step = 0
        self.max_step = 1000

        mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5)
        utils.EzPickle.__init__(self)
github JannerM / mbpo / mbpo / env / ant.py View on Github external
import numpy as np
from gym import utils
from gym.envs.mujoco import mujoco_env

class AntTruncatedObsEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    """
        External forces (sim.data.cfrc_ext) are removed from the observation.
        Otherwise identical to Ant-v2 from
        https://github.com/openai/gym/blob/master/gym/envs/mujoco/ant.py
    """
    def __init__(self):
        mujoco_env.MujocoEnv.__init__(self, 'ant.xml', 5)
        utils.EzPickle.__init__(self)

    def step(self, a):
        xposbefore = self.get_body_com("torso")[0]
        self.do_simulation(a, self.frame_skip)
        xposafter = self.get_body_com("torso")[0]
        forward_reward = (xposafter - xposbefore)/self.dt
        ctrl_cost = .5 * np.square(a).sum()
        contact_cost = 0.5 * 1e-3 * np.sum(
github openai / gym / gym / envs / mujoco / inverted_pendulum.py View on Github external
def __init__(self):
        utils.EzPickle.__init__(self)
        mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
github ethz-asl / reinmav-gym / gym_reinmav / envs / mujoco / mujoco_quad.py View on Github external
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# *************************************************************************
import numpy as np
import os
from gym import utils
from gym.envs.mujoco import mujoco_env


class MujocoQuadEnv(mujoco_env.MujocoEnv, utils.EzPickle):
    def __init__(self, xml_name="quadrotor_ground.xml"):

        xml_path = os.path.join(os.path.dirname(__file__), "./assets", xml_name)

        utils.EzPickle.__init__(self)
        mujoco_env.MujocoEnv.__init__(self, xml_path, 2)

    def step(self, a):
        reward = 0
        self.do_simulation(self.clip_action(a), self.frame_skip)
        ob = self._get_obs()
        notdone = np.isfinite(ob).all()
        done = not notdone
        return ob, reward, done, {}

    def clip_action(self, action):