Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
register_env(env_name, create_env)
test_env = create_env()
obs_space = test_env.observation_space
act_space = test_env.action_space
# Setup PG with an ensemble of `num_policies` different policy graphs
policy_graphs = {'av': (PPOPolicyGraph, obs_space, act_space, {})}
def policy_mapping_fn(agent_id):
return 'av'
config.update({
'multiagent': {
'policy_graphs': policy_graphs,
'policy_mapping_fn': tune.function(policy_mapping_fn),
"policies_to_train": ["av"]
}
})
return alg_run, env_name, config
config.update({
"train_batch_size": train_batch_size,
"horizon": 1000,
"lr_schedule":
[[0, hparams['lr_init']],
[20000000, hparams['lr_final']]],
"num_workers": num_workers,
"num_gpus": gpus_for_driver, # The number of GPUs for the driver
"num_cpus_for_driver": cpus_for_driver,
"num_gpus_per_worker": num_gpus_per_worker, # Can be a fraction
"num_cpus_per_worker": num_cpus_per_worker, # Can be a fraction
"gamma": hparams['gamma'], # Discount factor
"entropy_coeff": hparams['entropy_coeff'],
"multiagent": {
"policy_graphs": policy_graphs,
"policy_mapping_fn": tune.function(policy_mapping_fn),
},
"model": {"custom_model": "conv_to_fc_net", "use_lstm": True,
"lstm_cell_size": 128}
})
return algorithm, env_name, config
if platform is None:
if osp.exists(osp.expanduser("~/ray_bootstrap_config.yaml")):
platform = "baremetal"
if platform == "baremetal":
baremetal = dict(baremetal)
if "ssh_key" not in baremetal:
baremetal["ssh_key"] = osp.expanduser("~/ray_bootstrap_key.pem")
if "host" not in baremetal:
baremetal["host"] = f"{getpass.getuser()}@{socket.getfqdn()}"
if "dir" not in baremetal:
baremetal["dir"] = osp.expanduser("~/adversarial-policies/data")
spec["run_kwargs"] = {
"upload_dir": ":".join([baremetal["host"], baremetal["ssh_key"], baremetal["dir"]]),
"sync_to_cloud": tune.function(_rsync_func),
}
ray_server = "localhost:6379"
_ = locals() # quieten flake8 unused variable warning
del _
def tune_example(num_replicas=1, use_gpu=False):
config = {
"model_creator": tune.function(simple_model),
"data_creator": tune.function(simple_dataset),
"num_replicas": num_replicas,
"use_gpu": use_gpu,
"trainer_config": create_config(batch_size=128)
}
analysis = tune.run(
TFTrainable,
num_samples=2,
config=config,
stop={"training_iteration": 2},
verbose=1)
return analysis.get_best_config(metric="validation_loss", mode="min")
def parse_search_config(self, search_config: Dict) -> Dict:
for hyperparameter, val in search_config.items():
if not isinstance(val, dict):
ray_sampler = val
elif val['sampling strategy'] == 'loguniform':
low, high = val['bounds'][0], val['bounds'][1]
ray_sampler = function(RandomSearch.random_loguniform(low, high))
elif val['sampling strategy'] == 'integer':
low, high = val['bounds'][0], val['bounds'][1]
ray_sampler = function(RandomSearch.random_integer(low, high))
elif val['sampling strategy'] == 'choice':
ray_sampler = function(RandomSearch.random_choice(*val['choices']))
elif val['sampling strategy'] == 'subset':
ray_sampler = function(RandomSearch.random_subset(*val['choices']))
elif val['sampling strategy'] == 'pair':
ray_sampler = function(RandomSearch.random_pair(*val['choices']))
elif val['sampling strategy'] == 'uniform':
low, high = val['bounds'][0], val['bounds'][1]
ray_sampler = function(RandomSearch.random_uniform(low, high))
else:
raise KeyError(f"sampling strategy {val['sampling strategy']} does not exist")
search_config[hyperparameter] = ray_sampler
return search_config
# information for replay
config['env_config']['func_create'] = tune.function(env_creator)
config['env_config']['env_name'] = env_name
config['env_config']['run'] = algorithm
# hyperparams
config.update({
"train_batch_size": 30000,
"horizon": 1000,
"lr_schedule":
[[0, 0.00126],
[20000000, 0.000012]],
"num_workers": NUM_CPUS - 1,
"entropy_coeff": -.00176,
"multiagent": {
"policy_graphs": policy_graphs,
"policy_mapping_fn": tune.function(policy_mapping_fn),
},
"model": {"custom_model": "conv_to_fc_net", "use_lstm": True,
"lstm_cell_size": 128}
})
return algorithm, env_name, config
policy_graphs = {}
for i in range(NUM_AGENTS):
policy_graphs['agent-' + str(i)] = gen_policy()
def policy_mapping_fn(agent_id):
return agent_id
# register the custom model
model_name = "conv_to_fc_net"
ModelCatalog.register_custom_model(model_name, ConvToFCNet)
algorithm = 'A3C'
agent_cls = get_agent_class(algorithm)
config = agent_cls._default_config.copy()
# information for replay
config['env_config']['func_create'] = tune.function(env_creator)
config['env_config']['env_name'] = env_name
config['env_config']['run'] = algorithm
# hyperparams
config.update({
"train_batch_size": 30000,
"horizon": 1000,
"lr_schedule":
[[0, 0.00126],
[20000000, 0.000012]],
"num_workers": NUM_CPUS - 1,
"entropy_coeff": -.00176,
"multiagent": {
"policy_graphs": policy_graphs,
"policy_mapping_fn": tune.function(policy_mapping_fn),
},
"model": {"custom_model": "conv_to_fc_net", "use_lstm": True,
register_env(env_name, create_env)
test_env = create_env()
obs_space = test_env.observation_space
act_space = test_env.action_space
# Setup PG with an ensemble of `num_policies` different policy graphs
policy_graphs = {'av': (PPOPolicyGraph, obs_space, act_space, {})}
def policy_mapping_fn(agent_id):
return 'av'
config.update({
'multiagent': {
'policy_graphs': policy_graphs,
'policy_mapping_fn': tune.function(policy_mapping_fn),
"policies_to_train": ["av"]
}
})
return alg_run, env_name, config
def convert(self) -> Callable[..., Any]:
if self.local:
return ray.tune.function(lambda spec: eval(f'spec')) # TODO what do here
return ray.tune.function(lambda spec: eval(f'spec.config.params.{self.root_schema}'))