How to use the gym.wrappers function in gym

To help you get started, we’ve selected a few gym examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Stable-Baselines-Team / stable-baselines / tests / test_utils.py View on Github external
@pytest.mark.parametrize("wrapper_class", [None, gym.wrappers.TimeLimit])
def test_make_vec_env(env_id, n_envs, wrapper_class, use_subprocess):
    env = make_vec_env(env_id, n_envs, use_subprocess=use_subprocess,
                       wrapper_class=wrapper_class, monitor_dir=None, seed=0)

    assert env.num_envs == n_envs

    if not use_subprocess:
        assert isinstance(env, DummyVecEnv)
        if wrapper_class is not None:
            assert isinstance(env.envs[0], wrapper_class)
        else:
            assert isinstance(env.envs[0], Monitor)
    else:
        assert isinstance(env, SubprocVecEnv)
    # Kill subprocesses
    env.close()
github facebookresearch / ReAgent / trainer.py View on Github external
exit()

    # if actually render env
    if args.render:
        proposed_MONITOR_FOLDER = MONITOR_FOLDER + get_session_id(args)
        if not os.path.isdir(MODEL_PATH):
            os.mkdir(MODEL_PATH)
        if not os.path.isdir(MONITOR_FOLDER):
            os.mkdir(MONITOR_FOLDER)
        if os.path.isdir(proposed_MONITOR_FOLDER):
            print("Warning: monitor output folder {} exists, overwriting".
                  format(proposed_MONITOR_FOLDER))
        else:
            os.mkdir(proposed_MONITOR_FOLDER)
        # overwriting
        env = wrappers.Monitor(env, proposed_MONITOR_FOLDER, force=True)

    state_shape, state_type, state_range, _, _ = identify_env_input(
        env.observation_space)
    action_shape, _, action_range, _, _ = identify_env_input(env.action_space)

    print("Env setting: state/action type(shape):", env.observation_space,
          env.action_space)

    return env, state_shape, state_type, action_shape, action_range
github matthieu637 / ddrl / gym / run_mpi.py View on Github external
if (clparams['goal_based']) and not isinstance(env.observation_space, gym.spaces.Dict):
    print("goal_based algorithms only works with goal based goal-oriented environment")
    exit(1)

#for goal-oriented environment (https://openai.com/blog/ingredients-for-robotics-research/)
if isinstance(env.observation_space, gym.spaces.Dict):
    goal_size=env.observation_space.spaces.get('desired_goal').shape[0]
    print("Goal space:", env.observation_space.spaces.get('desired_goal'))
    print("Goal observed space:", env.observation_space.spaces.get('achieved_goal'))
    print("Observation space:", env.observation_space.spaces.get('observation'))
    #the following might be false for some env
    goal_start=goal_size
    keys = env.observation_space.spaces.keys()
    print("Keys order: ", list(keys))
    env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))

observation = env.reset()
nb_sensors = env.observation_space.shape[0]

print("State space:", env.observation_space)
print("- low:", env.observation_space.low)
print("- high:", env.observation_space.high)

print("Action space:", env.action_space)
print("- low:", env.action_space.low)
print("- high:", env.action_space.high)

for i in range(env.action_space.shape[0]):
    assert env.action_space.low[i] == - env.action_space.high[i]
action_scale=env.action_space.high
github DwangoMediaVillage / chainer_spiral / random_drawing.py View on Github external
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('brush_info_file')
    parser.add_argument('--image_resolution', type=int, default=64)
    parser.add_argument('--pos_resolution', type=int, default=32)
    parser.add_argument('--max_episode_steps', type=int, default=10)
    args = parser.parse_args()

    env = MyPaintEnv(imsize=args.image_resolution,
                     pos_resolution=args.pos_resolution,
                     max_episode_steps=args.max_episode_steps,
                     brush_info_file=args.brush_info_file)

    # Gym's monitor does not support small image inputs
    if args.image_resolution >= 30:
        env = wrappers.Monitor(env, directory='./tmp', force=True)

    agent = RandomAgent(env.action_space)

    reward = 0
    done = False
    ob = env.reset()

    for t in range(args.max_episode_steps):
        action = agent.act(ob, reward, done)
        if t == 0:
            action['prob'] = 0
        ob, reward, done, _ = env.step(action)

    env.close()

    if args.image_resolution < 30:
github RLOpensource / IMPALA-Scalable-Distributed-Deep-RL-with-Importance-Weighted-Actor-Learner-Architectures / async_agent.py View on Github external
def run(self):
        self.sess.run(self.global_to_local)
        self.env = gym.make('PongDeterministic-v4')
        if self.name == 'thread_0':
            self.env = gym.wrappers.Monitor(self.env, 'save-mov', video_callable=lambda episode_id: episode_id%10==0)
        
        done = False
        frame = self.env.reset()
        frame = utils.pipeline(frame)
        history = np.stack((frame, frame, frame, frame), axis=2)
        state = copy.deepcopy(history)
        episode = 0
        score = 0
        episode_step = 0
        total_max_prob = 0
        loss_step = 0

        writer = tensorboardX.SummaryWriter('runs/'+self.name)

        while True:
            loss_step += 1
github xpharry / dqn-adaptive-cruise / gym-vehicle / experiments / vehicle / circle_track2_vehicle_lcc_dqn_dense2.py View on Github external
updateTargetNetwork = 10000
        explorationRate = 1
        minibatch_size = 64
        learnStart = 64
        learningRate = 0.00025
        discountFactor = 0.99
        memorySize = 1000000
        network_inputs = 14
        network_outputs = 3
        network_structure = [100, 70, 50, 70, 100]
        current_epoch = 0

        deepQ = DeepQ(network_inputs, network_outputs, memorySize, discountFactor, learningRate, learnStart)
        deepQ.initNetworks(network_structure)
        # env.monitor.start(outdir, force=True, seed=None)
        gym.wrappers.Monitor(env, outdir, force=True)
    else:
        #Load weights, monitor info and parameter info.
        #ADD TRY CATCH fro this else
        with open(params_json) as outfile:
            d = json.load(outfile)
            epochs = d.get('epochs')
            steps = d.get('steps')
            updateTargetNetwork = d.get('updateTargetNetwork')
            explorationRate = d.get('explorationRate')
            minibatch_size = d.get('minibatch_size')
            learnStart = d.get('learnStart')
            learningRate = d.get('learningRate')
            discountFactor = d.get('discountFactor')
            memorySize = d.get('memorySize')
            network_inputs = d.get('network_inputs')
            network_outputs = d.get('network_outputs')
github NoListen / ERL / dqn / agent.py View on Github external
def play(self, n_step=10000, n_episode=100, test_ep=None, render=False):
    if test_ep == None:
      test_ep = self.ep_end

    test_history = History(self.config)

    if not self.display:
      gym_dir = './tmp/%s-%s' % (self.env_name, get_time())
      self.env.env = gym.wrappers.Monitor(self.env.env, gym_dir)

    best_reward, best_idx = 0, 0
    ep_rewards = []
    for idx in tqdm(range(n_episode),ncols=70):
      screen, reward, action, terminal = self.env.new_random_game()
      current_reward = 0
      test_history = init_history(test_history, screen, self.history_length)


      for t in range(n_step):
        action = self.predict(test_history.get(), test_ep)
        screen, reward, terminal = self.env.act(action)
        test_history.add(screen)

        current_reward += reward
        if terminal:
github xuwd11 / cs294-112_hws / hw3 / default / run_dqn_ram.py View on Github external
def get_env(seed):
    env = gym.make('Pong-ram-v0')

    set_global_seeds(seed)
    env.seed(seed)

    expt_dir = '/tmp/hw3_vid_dir/'
    env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True)
    env = wrap_deepmind_ram(env)

    return env
github PacktPublishing / Deep-Reinforcement-Learning-Hands-On / Chapter14 / 03_play_a2c.py View on Github external
ENV_ID = "MinitaurBulletEnv-v0"

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-m", "--model", required=True, help="Model file to load")
    parser.add_argument("-e", "--env", default=ENV_ID, help="Environment name to use, default=" + ENV_ID)
    parser.add_argument("-r", "--record", help="If specified, sets the recording dir, default=Disabled")
    args = parser.parse_args()

    spec = gym.envs.registry.spec(args.env)
    spec._kwargs['render'] = False
    env = gym.make(args.env)
    if args.record:
        env = gym.wrappers.Monitor(env, args.record)

    net = model.ModelA2C(env.observation_space.shape[0], env.action_space.shape[0])
    net.load_state_dict(torch.load(args.model))

    obs = env.reset()
    total_reward = 0.0
    total_steps = 0
    while True:
        obs_v = torch.FloatTensor([obs])
        mu_v, var_v, val_v = net(obs_v)
        action = mu_v.squeeze(dim=0).data.numpy()
        action = np.clip(action, -1, 1)
        obs, reward, done, _ = env.step(action)
        total_reward += reward
        total_steps += 1
        if done:
github chainer / chainerrl / examples / atari / train_nsq_ale.py View on Github external
def make_env(process_idx, test):
        # Use different random seeds for train and test envs
        process_seed = process_seeds[process_idx]
        env_seed = 2 ** 31 - 1 - process_seed if test else process_seed
        env = atari_wrappers.wrap_deepmind(
            atari_wrappers.make_atari(args.env, max_frames=args.max_frames),
            episode_life=not test,
            clip_rewards=not test)
        env.seed(int(env_seed))
        if test:
            # Randomize actions like epsilon-greedy in evaluation as well
            env = chainerrl.wrappers.RandomizeAction(env, 0.05)
        if args.monitor:
            env = gym.wrappers.Monitor(
                env, args.outdir,
                mode='evaluation' if test else 'training')
        if args.render:
            env = chainerrl.wrappers.Render(env)
        return env