Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# LSTM
a = nn.LSTM(2, 3, 2)
ortho_init(a, weight_scale=1000., constant_bias=10.)
assert a.weight_hh_l0.max().item() > 50.
assert a.weight_hh_l1.max().item() > 50.
assert a.weight_ih_l0.max().item() > 50.
assert a.weight_ih_l1.max().item() > 50.
assert np.allclose(a.bias_hh_l0.detach().numpy(), 10.)
assert np.allclose(a.bias_hh_l1.detach().numpy(), 10.)
assert np.allclose(a.bias_ih_l0.detach().numpy(), 10.)
assert np.allclose(a.bias_ih_l1.detach().numpy(), 10.)
# LSTMCell
a = nn.LSTMCell(3, 2)
ortho_init(a, weight_scale=1000., constant_bias=10.)
assert a.weight_hh.max().item() > 50.
assert a.weight_ih.max().item() > 50.
assert np.allclose(a.bias_hh.detach().numpy(), 10.)
assert np.allclose(a.bias_ih.detach().numpy(), 10.)
assert len(running_avg) == 2 and 'obs_avg' in running_avg and 'r_avg' in running_avg
assert 'mu' in running_avg['obs_avg'] and 'sigma' in running_avg['obs_avg']
assert not np.allclose(running_avg['obs_avg']['mu'], 0.0)
assert not np.allclose(running_avg['obs_avg']['sigma'], 0.0)
assert 'mu' not in running_avg['r_avg']
assert 'sigma' in running_avg['r_avg']
assert running_avg['r_avg']['sigma'] is None
a = [1]*5
obs, rewards, _, _ = venv.step(a)
assert rewards.max() == 0.0001
del venv, obs, a
# other settings: turn off use_obs
venv = make_vec_env(vec_env_class, make_gym_env, 'CartPole-v1', 5, 1, True)
venv = VecStandardize(venv,
use_obs=False,
use_reward=False,
clip_obs=0.001,
clip_reward=0.0001,
gamma=0.99,
eps=1e-8)
obs = venv.reset()
assert np.asarray(obs).max() > 0.001
a = [1]*5
obs, rewards, _, _ = venv.step(a)
assert np.asarray(rewards).max() >= 0.0001
del venv, obs, a
# other settings: gamma
def test_categorical_head(feature_dim, batch_size, num_action):
action_head = CategoricalHead(feature_dim, num_action, torch.device('cpu'))
assert isinstance(action_head, Module)
assert action_head.feature_dim == feature_dim
assert action_head.num_action == num_action
assert action_head.device.type == 'cpu'
dist = action_head(torch.randn(batch_size, feature_dim))
assert isinstance(dist, Categorical)
assert dist.batch_shape == (batch_size,)
assert dist.probs.shape == (batch_size, num_action)
x = dist.sample()
assert x.shape == (batch_size,)
assert np.allclose(x, y)
del x, y
x = np.random.randn(10, 20, 30)
y = tensorify(x, 'cpu')
assert np.allclose(x, y)
del x, y
# raw list
x = [2.43]
y = tensorify(x, 'cpu')
assert np.allclose(x, y.item())
del x, y
x = [1, 2, 3, 4, 5, 6]
y = tensorify(x, 'cpu')
assert np.allclose(x, y)
del x, y
x = [[1, 2], [3, 4], [5, 6]]
y = tensorify(x, 'cpu')
assert np.allclose(x, y)
del x, y
def test_run_experiment(num_sample, max_workers, chunksize):
def run(config, seed, device, logdir):
return config['ID'], seed, device, logdir
config = Config({'network.lr': Grid([1e-3, 5e-3]),
'network.size': [32, 16],
'env.id': Grid(['CartPole-v1', 'Ant-v2'])},
num_sample=num_sample,
keep_dict_order=True)
seeds = [1, 2, 3]
log_dir = './some_path'
run_experiment(run, config, seeds, log_dir, max_workers, chunksize, use_gpu=False, gpu_ids=None)
p = Path('./some_path')
assert p.exists()
assert (p / 'configs.pkl').exists()
assert (p / 'source_files').exists() and (p / 'source_files').is_dir()
# Check all configuration folders with their IDs and subfolders for all random seeds
for i in range(4):
config_p = p / str(i)
assert config_p.exists()
assert (config_p / 'config.yml').exists()
for seed in seeds:
assert (config_p / str(seed)).exists()
# Clean the logging directory
rmtree(p)
# Test remove
assert not p.exists()
def test_tuple():
with pytest.raises(AssertionError):
Tuple(Discrete(10))
space = Tuple([Discrete(5),
Box(-1.0, 1.0, np.float32, shape=(2, 3)),
Dict({'success': Discrete(2), 'velocity': Box(-1, 1, np.float32, shape=(1, 3))})])
assert len(space.spaces) == 3
assert space.spaces[0] == Discrete(5)
assert space.spaces[1] == Box(-1.0, 1.0, np.float32, shape=(2, 3))
assert space.spaces[2] == Dict({'success': Discrete(2), 'velocity': Box(-1, 1, np.float32, shape=(1, 3))})
sample = space.sample()
assert isinstance(sample, tuple) and len(sample) == 3
assert sample[0] in Discrete(5)
assert sample[1] in Box(-1.0, 1.0, np.float32, shape=(2, 3))
assert sample[2] in Dict({'success': Discrete(2), 'velocity': Box(-1, 1, np.float32, shape=(1, 3))})
assert space.flat_dim == 5+2*3+2+3
assert space.flatten(sample).shape == (16,)
sample2 = space.unflatten(space.flatten(sample))
assert sample[0] == sample2[0]
assert np.allclose(sample[1], sample2[1])
assert sample[2]['success'] == sample2[2]['success']
def test_box():
with pytest.raises(AssertionError):
Box(-1.0, 1.0, dtype=None)
with pytest.raises(AssertionError):
Box(-1.0, [1.0, 2.0], np.float32, shape=(2,))
with pytest.raises(AttributeError):
Box(np.array([-1.0, -2.0]), [3.0, 4.0], np.float32)
def check(box):
assert all([dtype == np.float32 for dtype in [box.dtype, box.low.dtype, box.high.dtype]])
assert all([s == (2, 3) for s in [box.shape, box.low.shape, box.high.shape]])
assert np.allclose(box.low, np.full([2, 3], -1.0))
assert np.allclose(box.high, np.full([2, 3], 1.0))
sample = box.sample()
assert sample.shape == (2, 3) and sample.dtype == np.float32
assert box.flat_dim == 6 and isinstance(box.flat_dim, int)
assert box.flatten(sample).shape == (6,)
assert np.allclose(sample, box.unflatten(box.flatten(sample)))
assert sample in box
assert str(box) == 'Box(2, 3)'
assert box == Box(-1.0, 1.0, np.float32, shape=[2, 3])
del box, sample
def test_tuple():
with pytest.raises(AssertionError):
Tuple(Discrete(10))
space = Tuple([Discrete(5),
Box(-1.0, 1.0, np.float32, shape=(2, 3)),
Dict({'success': Discrete(2), 'velocity': Box(-1, 1, np.float32, shape=(1, 3))})])
assert len(space.spaces) == 3
assert space.spaces[0] == Discrete(5)
assert space.spaces[1] == Box(-1.0, 1.0, np.float32, shape=(2, 3))
assert space.spaces[2] == Dict({'success': Discrete(2), 'velocity': Box(-1, 1, np.float32, shape=(1, 3))})
sample = space.sample()
assert isinstance(sample, tuple) and len(sample) == 3
assert sample[0] in Discrete(5)
assert sample[1] in Box(-1.0, 1.0, np.float32, shape=(2, 3))
assert sample[2] in Dict({'success': Discrete(2), 'velocity': Box(-1, 1, np.float32, shape=(1, 3))})
assert space.flat_dim == 5+2*3+2+3
assert space.flatten(sample).shape == (16,)
sample2 = space.unflatten(space.flatten(sample))
assert sample[0] == sample2[0]
assert np.allclose(sample[1], sample2[1])
assert sample[2]['success'] == sample2[2]['success']
assert np.allclose(sample[2]['velocity'], sample2[2]['velocity'])
def test_convert_gym_space():
# Discrete
gym_space = gym.spaces.Discrete(n=5)
lagom_space = convert_gym_space(gym_space)
assert isinstance(lagom_space, Discrete)
assert not isinstance(lagom_space, gym.spaces.Discrete)
assert lagom_space.n == 5
assert lagom_space.sample() in lagom_space
del gym_space, lagom_space
# Box
gym_space = gym.spaces.Box(low=-2.0, high=2.0, shape=(2, 3), dtype=np.float32)
lagom_space = convert_gym_space(gym_space)
assert isinstance(lagom_space, Box)
assert not isinstance(lagom_space, gym.spaces.Box)
assert lagom_space.shape == (2, 3)
assert lagom_space.sample() in lagom_space
del gym_space, lagom_space
# Dict
gym_space = gym.spaces.Dict({
'sensors': gym.spaces.Dict({
'position': gym.spaces.Box(low=-100, high=100, shape=(3,), dtype=np.float32),
'velocity': gym.spaces.Box(low=-1, high=1, shape=(3,), dtype=np.float32)}),
'charge': gym.spaces.Discrete(100)})
lagom_space = convert_gym_space(gym_space)
assert isinstance(lagom_space, Dict)
assert not isinstance(lagom_space, gym.spaces.Dict)
assert len(lagom_space.spaces) == 2
def test_vec_env(vec_env_class):
# unpack class
v_id, vec_env_class = vec_env_class
venv = make_vec_env(vec_env_class, make_gym_env, 'CartPole-v1', 5, 1, True)
assert isinstance(venv, VecEnv)
assert v_id in [0, 1]
if v_id == 0:
isinstance(venv, SerialVecEnv)
elif v_id == 1:
assert isinstance(venv, ParallelVecEnv)
assert venv.num_env == 5
assert not venv.closed and venv.viewer is None
assert venv.unwrapped is venv
assert isinstance(venv.observation_space, Box)
assert isinstance(venv.action_space, Discrete)
assert venv.T == 500
assert venv.max_episode_reward == 475.0
assert venv.reward_range == (-float('inf'), float('inf'))
obs = venv.reset()
assert len(obs) == 5
assert np.asarray(obs).shape == (5, 4)
assert all([not np.allclose(obs[0], obs[i]) for i in [1, 2, 3, 4]])
a = [1]*5
obs, rewards, dones, infos = venv.step(a)
assert all([len(item) == 5 for item in [obs, rewards, dones, infos]])
assert all([not np.allclose(obs[0], obs[i]) for i in [1, 2, 3, 4]])
# EnvSpec
env_spec = EnvSpec(venv)
assert isinstance(env_spec.action_space, Discrete)