How to use the gym.spaces function in gym

To help you get started, we’ve selected a few gym examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tensorflow / tensor2tensor / tensor2tensor / trax / rl / ppo_trainer.py View on Github external
self._max_timestep_eval = max_timestep_eval
    self._gamma = gamma
    self._lambda_ = lambda_
    self._c1 = c1
    self._c2 = c2
    self._eval_every_n = eval_every_n
    self._save_every_n = save_every_n
    self._done_frac_for_policy_save = done_frac_for_policy_save
    self._n_evals = n_evals
    self._len_history_for_policy = len_history_for_policy
    self._eval_temperatures = eval_temperatures
    self._separate_eval = separate_eval

    action_space = self.train_env.action_space
    assert isinstance(
        action_space, (gym.spaces.Discrete, gym.spaces.MultiDiscrete))
    if isinstance(action_space, gym.spaces.Discrete):
      n_actions = action_space.n
      n_controls = 1
    else:
      (n_controls,) = action_space.nvec.shape
      assert n_controls > 0
      assert onp.min(action_space.nvec) == onp.max(action_space.nvec), (
          "Every control must have the same number of actions.")
      n_actions = action_space.nvec[0]
    self._n_actions = n_actions
    self._n_controls = n_controls

    self._rng = trax.get_random_number_generator_and_set_seed(random_seed)
    self._rng, key1 = jax_random.split(self._rng, num=2)

    vocab_size = policy_and_value_vocab_size
github NervanaSystems / coach / rl_coach / environments / toy_problems / short_corridor.py View on Github external
def __init__(self):

        self.REVERSE_STATE = 1
        self.GOAL_STATE = 3
        self.NUM_STATES = self.GOAL_STATE + 1

        self.observation_space = spaces.Box(0, 1, shape=(self.NUM_STATES,))
        self.action_space = spaces.Discrete(2)

        self.current_state = 0
        self.goal_reached = False
        self.max_steps = 500
github rlworkgroup / garage / src / garage / envs / util.py View on Github external
def flat_dim(space):
    if isinstance(space, gym.spaces.Box):
        return np.prod(space.low.shape)
    elif isinstance(space, gym.spaces.Discrete):
        return space.n
    elif isinstance(space, gym.spaces.Tuple):
        return np.sum([flat_dim(x) for x in space.spaces])
    else:
        raise NotImplementedError
github hanyas / trajopt / trajopt / envs / lqr / lqr.py View on Github external
self._g = np.array([10., 10.])

        # stochastic dynamics
        self._A = np.array([[1.1, 0.], [1.0, 1.0]])
        self._B = np.array([[1.], [0.]])
        self._c = - self._A @ self._g  # stable at goal

        self._sigma = 1e-8 * np.eye(2)

        self._gw = np.array([1e1, 1e1])
        self._uw = np.array([1.])

        self._xmax = np.array([np.inf, np.inf])
        self._umax = np.inf

        self.action_space = spaces.Box(low=-self._umax,
                                       high=self._umax, shape=(1,))

        self.observation_space = spaces.Box(low=-self._xmax,
                                            high=self._xmax)

        self.state = None
        self.np_random = None

        self.seed()
github lerrel / gym-adv / gym / envs / adversarial / mujoco / inverted_pendulum.py View on Github external
def __init__(self):
        utils.EzPickle.__init__(self)
        mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
        ## Adversarial setup
        self._adv_f_bname = b'pole' #Byte String name of body on which the adversary force will be applied
        bnames = self.model.body_names
        self._adv_bindex = bnames.index(self._adv_f_bname) #Index of the body on which the adversary force will be applied
        adv_max_force = 5.
        high_adv = np.ones(2)*adv_max_force
        low_adv = -high_adv
        self.adv_action_space = spaces.Box(low_adv, high_adv)
        self.pro_action_space = self.action_space
github Sohojoe / MarathonEnvsBaselines / gym-unity / gym_unity / envs / unity_env.py View on Github external
else:
            high = np.array([1] * brain.vector_action_space_size[0])
            self._action_space = spaces.Box(-high, high, dtype=np.float32)
        high = np.array([np.inf] * brain.vector_observation_space_size)
        self.action_meanings = brain.vector_action_descriptions
        if self.use_visual:
            if brain.camera_resolutions[0]["blackAndWhite"]:
                depth = 1
            else:
                depth = 3
            self._observation_space = spaces.Box(0, 1, dtype=np.float32,
                                                 shape=(brain.camera_resolutions[0]["height"],
                                                        brain.camera_resolutions[0]["width"],
                                                        depth))
        else:
            self._observation_space = spaces.Box(-high, high, dtype=np.float32)
github wirelessr / HeartsEnv / hearts / single.py View on Github external
def __init__(self):
        self.n_seed = None
        self.observation_space = spaces.Tuple([
            # player states
            spaces.Tuple([
                spaces.Tuple([ # p0, p1, p2
                    spaces.Discrete(200), # score
                    spaces.Tuple([ # income
                        spaces.MultiDiscrete([13, 4])
                    ] * 52),
                ] * 3),
                spaces.Discrete(200), # p3 score
                spaces.Tuple([ # hand
                    spaces.MultiDiscrete([13, 4])
                ] * 13),
                spaces.Tuple([ # income
                    spaces.MultiDiscrete([13, 4])
                ] * 52),
            ]),
github openai / mlsh / gym / gym / envs / toy_text / kellycoinflip.py View on Github external
def __init__(self, initialWealth=25.0, edge=0.6, maxWealth=250.0, maxRounds=300):

        self.action_space = spaces.Discrete(int(maxWealth*100)) # betting in penny increments
        self.observation_space = spaces.Tuple((
            spaces.Box(0, maxWealth, [1]), # (w,b)
            spaces.Discrete(maxRounds+1)))
        self.reward_range = (0, maxWealth)
        self.edge = edge
        self.wealth = initialWealth
        self.initialWealth = initialWealth
        self.maxRounds = maxRounds
        self.maxWealth = maxWealth
        self._seed()
        self._reset()
github robotlearn / pyrobolearn / pyrobolearn / states / merged_space.py View on Github external
def spaces(self, spaces):
        if isinstance(spaces, gym.spaces.Space):
            spaces = [spaces]
        if not isinstance(spaces, (list, tuple, np.ndarray)):
            raise TypeError("Expecting the given spaces to be a list/tuple/np.ndarray of `gym.spaces.Space`, but "
                            "got instead: {}".format(type(spaces)))
        for i, space in enumerate(spaces):
            if not isinstance(space, gym.spaces.Space):
                raise TypeError("Expecting the {}th item to be an instance of `gym.spaces.Space`, but got instead: "
                                "{}".format(i, type(space)))
        self._spaces = spaces