Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@pytest.mark.parametrize("wrapper_class", [None, gym.wrappers.TimeLimit])
def test_make_vec_env(env_id, n_envs, wrapper_class, use_subprocess):
env = make_vec_env(env_id, n_envs, use_subprocess=use_subprocess,
wrapper_class=wrapper_class, monitor_dir=None, seed=0)
assert env.num_envs == n_envs
if not use_subprocess:
assert isinstance(env, DummyVecEnv)
if wrapper_class is not None:
assert isinstance(env.envs[0], wrapper_class)
else:
assert isinstance(env.envs[0], Monitor)
else:
assert isinstance(env, SubprocVecEnv)
# Kill subprocesses
env.close()
exit()
# if actually render env
if args.render:
proposed_MONITOR_FOLDER = MONITOR_FOLDER + get_session_id(args)
if not os.path.isdir(MODEL_PATH):
os.mkdir(MODEL_PATH)
if not os.path.isdir(MONITOR_FOLDER):
os.mkdir(MONITOR_FOLDER)
if os.path.isdir(proposed_MONITOR_FOLDER):
print("Warning: monitor output folder {} exists, overwriting".
format(proposed_MONITOR_FOLDER))
else:
os.mkdir(proposed_MONITOR_FOLDER)
# overwriting
env = wrappers.Monitor(env, proposed_MONITOR_FOLDER, force=True)
state_shape, state_type, state_range, _, _ = identify_env_input(
env.observation_space)
action_shape, _, action_range, _, _ = identify_env_input(env.action_space)
print("Env setting: state/action type(shape):", env.observation_space,
env.action_space)
return env, state_shape, state_type, action_shape, action_range
if (clparams['goal_based']) and not isinstance(env.observation_space, gym.spaces.Dict):
print("goal_based algorithms only works with goal based goal-oriented environment")
exit(1)
#for goal-oriented environment (https://openai.com/blog/ingredients-for-robotics-research/)
if isinstance(env.observation_space, gym.spaces.Dict):
goal_size=env.observation_space.spaces.get('desired_goal').shape[0]
print("Goal space:", env.observation_space.spaces.get('desired_goal'))
print("Goal observed space:", env.observation_space.spaces.get('achieved_goal'))
print("Observation space:", env.observation_space.spaces.get('observation'))
#the following might be false for some env
goal_start=goal_size
keys = env.observation_space.spaces.keys()
print("Keys order: ", list(keys))
env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))
observation = env.reset()
nb_sensors = env.observation_space.shape[0]
print("State space:", env.observation_space)
print("- low:", env.observation_space.low)
print("- high:", env.observation_space.high)
print("Action space:", env.action_space)
print("- low:", env.action_space.low)
print("- high:", env.action_space.high)
for i in range(env.action_space.shape[0]):
assert env.action_space.low[i] == - env.action_space.high[i]
action_scale=env.action_space.high
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('brush_info_file')
parser.add_argument('--image_resolution', type=int, default=64)
parser.add_argument('--pos_resolution', type=int, default=32)
parser.add_argument('--max_episode_steps', type=int, default=10)
args = parser.parse_args()
env = MyPaintEnv(imsize=args.image_resolution,
pos_resolution=args.pos_resolution,
max_episode_steps=args.max_episode_steps,
brush_info_file=args.brush_info_file)
# Gym's monitor does not support small image inputs
if args.image_resolution >= 30:
env = wrappers.Monitor(env, directory='./tmp', force=True)
agent = RandomAgent(env.action_space)
reward = 0
done = False
ob = env.reset()
for t in range(args.max_episode_steps):
action = agent.act(ob, reward, done)
if t == 0:
action['prob'] = 0
ob, reward, done, _ = env.step(action)
env.close()
if args.image_resolution < 30:
def run(self):
self.sess.run(self.global_to_local)
self.env = gym.make('PongDeterministic-v4')
if self.name == 'thread_0':
self.env = gym.wrappers.Monitor(self.env, 'save-mov', video_callable=lambda episode_id: episode_id%10==0)
done = False
frame = self.env.reset()
frame = utils.pipeline(frame)
history = np.stack((frame, frame, frame, frame), axis=2)
state = copy.deepcopy(history)
episode = 0
score = 0
episode_step = 0
total_max_prob = 0
loss_step = 0
writer = tensorboardX.SummaryWriter('runs/'+self.name)
while True:
loss_step += 1
updateTargetNetwork = 10000
explorationRate = 1
minibatch_size = 64
learnStart = 64
learningRate = 0.00025
discountFactor = 0.99
memorySize = 1000000
network_inputs = 14
network_outputs = 3
network_structure = [100, 70, 50, 70, 100]
current_epoch = 0
deepQ = DeepQ(network_inputs, network_outputs, memorySize, discountFactor, learningRate, learnStart)
deepQ.initNetworks(network_structure)
# env.monitor.start(outdir, force=True, seed=None)
gym.wrappers.Monitor(env, outdir, force=True)
else:
#Load weights, monitor info and parameter info.
#ADD TRY CATCH fro this else
with open(params_json) as outfile:
d = json.load(outfile)
epochs = d.get('epochs')
steps = d.get('steps')
updateTargetNetwork = d.get('updateTargetNetwork')
explorationRate = d.get('explorationRate')
minibatch_size = d.get('minibatch_size')
learnStart = d.get('learnStart')
learningRate = d.get('learningRate')
discountFactor = d.get('discountFactor')
memorySize = d.get('memorySize')
network_inputs = d.get('network_inputs')
network_outputs = d.get('network_outputs')
def play(self, n_step=10000, n_episode=100, test_ep=None, render=False):
if test_ep == None:
test_ep = self.ep_end
test_history = History(self.config)
if not self.display:
gym_dir = './tmp/%s-%s' % (self.env_name, get_time())
self.env.env = gym.wrappers.Monitor(self.env.env, gym_dir)
best_reward, best_idx = 0, 0
ep_rewards = []
for idx in tqdm(range(n_episode),ncols=70):
screen, reward, action, terminal = self.env.new_random_game()
current_reward = 0
test_history = init_history(test_history, screen, self.history_length)
for t in range(n_step):
action = self.predict(test_history.get(), test_ep)
screen, reward, terminal = self.env.act(action)
test_history.add(screen)
current_reward += reward
if terminal:
def get_env(seed):
env = gym.make('Pong-ram-v0')
set_global_seeds(seed)
env.seed(seed)
expt_dir = '/tmp/hw3_vid_dir/'
env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True)
env = wrap_deepmind_ram(env)
return env
ENV_ID = "MinitaurBulletEnv-v0"
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model", required=True, help="Model file to load")
parser.add_argument("-e", "--env", default=ENV_ID, help="Environment name to use, default=" + ENV_ID)
parser.add_argument("-r", "--record", help="If specified, sets the recording dir, default=Disabled")
args = parser.parse_args()
spec = gym.envs.registry.spec(args.env)
spec._kwargs['render'] = False
env = gym.make(args.env)
if args.record:
env = gym.wrappers.Monitor(env, args.record)
net = model.ModelA2C(env.observation_space.shape[0], env.action_space.shape[0])
net.load_state_dict(torch.load(args.model))
obs = env.reset()
total_reward = 0.0
total_steps = 0
while True:
obs_v = torch.FloatTensor([obs])
mu_v, var_v, val_v = net(obs_v)
action = mu_v.squeeze(dim=0).data.numpy()
action = np.clip(action, -1, 1)
obs, reward, done, _ = env.step(action)
total_reward += reward
total_steps += 1
if done:
def make_env(process_idx, test):
# Use different random seeds for train and test envs
process_seed = process_seeds[process_idx]
env_seed = 2 ** 31 - 1 - process_seed if test else process_seed
env = atari_wrappers.wrap_deepmind(
atari_wrappers.make_atari(args.env, max_frames=args.max_frames),
episode_life=not test,
clip_rewards=not test)
env.seed(int(env_seed))
if test:
# Randomize actions like epsilon-greedy in evaluation as well
env = chainerrl.wrappers.RandomizeAction(env, 0.05)
if args.monitor:
env = gym.wrappers.Monitor(
env, args.outdir,
mode='evaluation' if test else 'training')
if args.render:
env = chainerrl.wrappers.Render(env)
return env