Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
config = get_rllib_config(result_dir)
pkl = get_rllib_pkl(result_dir)
# check if we have a multiagent scenario but in a
# backwards compatible way
if config.get('multiagent', {}).get('policy_graphs', {}):
multiagent = True
config['multiagent'] = pkl['multiagent']
else:
multiagent = False
# Create and register a gym+rllib env
env_creator = pkl['env_config']['func_create']
env_name = config['env_config']['env_name']
register_env(env_name, env_creator.func)
ModelCatalog.register_custom_model("conv_to_fc_net", ConvToFCNet)
# Determine agent and checkpoint
config_run = config['env_config']['run'] if 'run' in config['env_config'] \
else None
if (args.run and config_run):
if (args.run != config_run):
print('visualizer_rllib.py: error: run argument '
+ '\'{}\' passed in '.format(args.run)
+ 'differs from the one stored in params.json '
+ '\'{}\''.format(config_run))
sys.exit(1)
if (args.run):
agent_cls = get_agent_class(args.run)
elif (config_run):
reward = -1
done = len(self.history) > 100
return self._next_obs(), reward, done, {}
def _next_obs(self):
token = random.choice([0, 1])
self.history.append(token)
return token
if __name__ == "__main__":
ray.init()
args = parser.parse_args()
ModelCatalog.register_custom_model("rnn", MyKerasRNN)
register_env("RepeatAfterMeEnv", lambda c: RepeatAfterMeEnv(c))
register_env("RepeatInitialEnv", lambda _: RepeatInitialEnv())
tune.run(
args.run,
stop={"episode_reward_mean": args.stop},
config={
"env": args.env,
"env_config": {
"repeat_delay": 2,
},
"gamma": 0.9,
"num_workers": 0,
"num_envs_per_worker": 20,
"entropy_coeff": 0.001,
"num_sgd_iter": 5,
"vf_loss_coeff": 1e-5,
"model": {
"custom_model": "rnn",
import ray
from ray.rllib import ppo
from ray.tune.registry import register_env
env_name = 'sonic_env'
# Note that the hyperparameters have been tuned for sonic, which can be used
# run by replacing the below function with:
#
# register_env(env_name, lambda config: sonic_on_ray.make(
# game='SonicTheHedgehog-Genesis',
# state='GreenHillZone.Act1'))
#
# However, to try Sonic, you have to obtain the ROM yourself (see then
# instructions at https://github.com/openai/retro/blob/master/README.md).
register_env(env_name,
lambda config: sonic_on_ray.make(game='Airstriker-Genesis',
state='Level1'))
ray.init()
config = ppo.DEFAULT_CONFIG.copy()
config.update({
'timesteps_per_batch': 40000,
'min_steps_per_task': 100,
'num_workers': 32,
'gamma': 0.99,
'lambda': 0.95,
'clip_param': 0.1,
'num_sgd_iter': 30,
'sgd_batchsize': 4096,
multiagent = False
# Run on only one cpu for rendering purposes
config['num_workers'] = 0
flow_params = get_flow_params(config)
# hack for old pkl files
# TODO(ev) remove eventually
sim_params = flow_params['sim']
setattr(sim_params, 'num_clients', 1)
# Create and register a gym+rllib env
create_env, env_name = make_create_env(
params=flow_params, version=0, render=False)
register_env(env_name, create_env)
# Determine agent and checkpoint
config_run = config['env_config']['run'] if 'run' in config['env_config'] \
else None
if args.run and config_run:
if args.run != config_run:
print('visualizer_rllib.py: error: run argument '
+ '\'{}\' passed in '.format(args.run)
+ 'differs from the one stored in params.json '
+ '\'{}\''.format(config_run))
sys.exit(1)
if args.run:
agent_cls = get_agent_class(args.run)
elif config_run:
agent_cls = get_agent_class(config_run)
else:
config['env_config']['flow_params'] = flow_json
config['env_config']['run'] = alg_run
# multiagent configuration
if policy_graphs is not None:
print("policy_graphs", policy_graphs)
config['multiagent'].update({'policies': policy_graphs})
if policy_mapping_fn is not None:
config['multiagent'].update({'policy_mapping_fn': tune.function(policy_mapping_fn)})
if policies_to_train is not None:
config['multiagent'].update({'policies_to_train': policies_to_train})
create_env, gym_name = make_create_env(params=flow_params)
# Register as rllib env
register_env(gym_name, create_env)
return alg_run, gym_name, config
setattr(sim_params, 'num_clients', 1)
# Determine agent and checkpoint
config_run = config['env_config'].get("run", None)
agent_cls = get_agent_class(config_run)
sim_params.restart_instance = True
dir_path = os.path.dirname(os.path.realpath(__file__))
emission_path = '{0}/emission/'.format(dir_path)
sim_params.emission_path = emission_path if gen_emission else None
# pick your rendering mode
sim_params.render = render
sim_params.restart_instance = False
create_env, env_name = make_create_env(params=flow_params, version=version)
register_env(env_name, create_env)
env_params = flow_params['env']
env_params.restart_instance = False
# create the agent that will be used to compute the actions
agent = agent_cls(env=env_name, config=config)
checkpoint = result_dir + '/checkpoint_{}'.format(checkpoint_num)
checkpoint = checkpoint + '/checkpoint-{}'.format(checkpoint_num)
agent.restore(checkpoint)
env = gym.make(env_name)
if sim_params.restart_instance:
env.restart_simulation(sim_params=sim_params, render=sim_params.render)
return env, env_params, agent
config["num_sgd_iter"] = 10
config['clip_actions'] = False # (ev) temporary ray bug
config["horizon"] = RLLIB_HORIZON # not same as env horizon.
config["vf_loss_coeff"] = 1e-9
config["lr"] = 5e-4
# save the flow params for replay
flow_json = json.dumps(
flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
config['env_config']['flow_params'] = flow_json
config['env_config']['run'] = alg_run
create_env, gym_name = make_create_env(params=flow_params, version=version)
# Register as rllib env
ray.tune.registry.register_env(gym_name, create_env)
return alg_run, gym_name, config
config['model']['use_lstm'] = True
config['model']['lstm_use_prev_action_reward'] = True
#config['model']['use_lstm'] = tune.grid_search([True, False])
# # config['model']["max_seq_len"] = tune.grid_search([5, 10])
config['model']["lstm_cell_size"] = 64
# save the flow params for replay
flow_json = json.dumps(
flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
config['env_config']['flow_params'] = flow_json
config['env_config']['run'] = alg_run
create_env, env_name = make_create_env(params=flow_params, version=0)
# Register as rllib env
register_env(env_name, create_env)
test_env = create_env()
obs_space = test_env.observation_space
act_space = test_env.action_space
# Setup PG with an ensemble of `num_policies` different policy graphs
policy_graphs = {'av': (PPOPolicyGraph, obs_space, act_space, {})}
def policy_mapping_fn(agent_id):
return 'av'
config.update({
'multiagent': {
'policy_graphs': policy_graphs,
'policy_mapping_fn': tune.function(policy_mapping_fn),
"policies_to_train": ["av"]
+ 2 * 5] * RL_VEHICLES,
"multiagent_act_shapes": [2] * RL_VEHICLES,
"is_shared_model": True,
"multiagent_shared_model": True,
"multiagent_hiddens": [[64, 64]] * RL_VEHICLES,
'flowenv': flow_env_name,
}
config["model"].update({"custom_options": options})
create_env, env_name = make_create_env(flow_env_name,
flow_params,
version=0,
)
# Register as rllib env
register_rllib_env(flow_env_name + '-v0', create_env)
# alg = ppo.PPOAgent(env=env_name, registry=get_registry(), config=config)
# Logging out flow_params to ray's experiment result folder
json_out_file = os.path.dirname(os.path.realpath(__file__)) + \
'/flow_params.json'
with open(json_out_file, 'w') as outfile:
json.dump(flow_params, outfile, cls=NameEncoder,
sort_keys=True, indent=4)
trials = run_experiments({
"m_bottleneck": {
"run": "PPO",
"env": flow_env_name + '-v0',
"config": {
**config