How to use the ray.tune function in ray

To help you get started, we’ve selected a few ray examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github JannerM / mbpo / examples / instrument.py View on Github external
variant_spec = example_module.get_variant_spec(example_args)
    trainable_class = example_module.get_trainable_class(example_args)

    experiment_id, experiment = generate_experiment(
        trainable_class, variant_spec, example_args)
    experiments = {experiment_id: experiment}

    ray.init(
        num_cpus=example_args.cpus,
        num_gpus=example_args.gpus,
        resources=example_args.resources or {},
        local_mode=local_mode,
        include_webui=example_args.include_webui,
        temp_dir=example_args.temp_dir)

    tune.run_experiments(
        experiments,
        with_server=example_args.with_server,
        server_port=4321,
        scheduler=None)
github flow-project / flow / examples / rllib / multiagent_exps / multiagent_bottleneck_nolc_agg.py View on Github external
alg_run = 'PPO'
    config = ppo.DEFAULT_CONFIG.copy()
    config['num_workers'] = N_CPUS
    config['train_batch_size'] = HORIZON * N_ROLLOUTS
    config['gamma'] = 0.999  # discount rate
    config['model'].update({'fcnet_hiddens': [64, 64]})
    config['clip_actions'] = True
    config['horizon'] = HORIZON
    config['vf_share_layers'] = True
    # config['use_centralized_vf'] = False
    # config['max_vf_agents'] = 140
    # config['simple_optimizer'] = True
    # config['vf_clip_param'] = 100

    # Grid search things
    config['lr'] = tune.grid_search([5e-5, 5e-4])
    config['num_sgd_iter'] = tune.grid_search([10, 30])

    # LSTM Things
    config['model']['use_lstm'] = tune.grid_search([True, False])
    #config['model']['use_lstm'] = tune.grid_search([True, False])
    # # config['model']["max_seq_len"] = tune.grid_search([5, 10])
    # config['model']["lstm_cell_size"] = 64

    # save the flow params for replay
    flow_json = json.dumps(
        flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, env_name = make_create_env(params=flow_params, version=0)
github flow-project / flow / examples / rllib / multiagent_exps / MA_bottle_lc_noagg_comm_p6.py View on Github external
alg_run = 'PPO'
    config = ppo.DEFAULT_CONFIG.copy()
    config['num_workers'] = N_CPUS
    config['train_batch_size'] = HORIZON * N_ROLLOUTS
    config['gamma'] = 0.999  # discount rate
    config['model'].update({'fcnet_hiddens': [64, 64]})
    config['clip_actions'] = True
    config['horizon'] = HORIZON
    config['vf_share_layers'] = True
    # config['use_centralized_vf'] = False
    # config['max_vf_agents'] = 140
    # config['simple_optimizer'] = True
    # config['vf_clip_param'] = 100

    # Grid search things
    config['lr'] = tune.grid_search([5e-5, 5e-4])
    config['num_sgd_iter'] = tune.grid_search([10, 30])

    # LSTM Things
    # config['model']['use_lstm'] = tune.grid_search([True, False])
    config['model']['lstm_use_prev_action_reward'] = True
    #config['model']['use_lstm'] = tune.grid_search([True, False])
    # # config['model']["max_seq_len"] = tune.grid_search([5, 10])
    config['model']["lstm_cell_size"] = 64

    # save the flow params for replay
    flow_json = json.dumps(
        flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, env_name = make_create_env(params=flow_params, version=0)
github praveen-palanisamy / macad-gym / examples / rllib / impala_agent.py View on Github external
if iter % 500 == 0:
            trainer.save("saved_models/multi-carla/" + args.model_arch)
        pprint(results)
else:
    config = {
        "env": "dm-" + env_name,
        "log_level": "DEBUG",
        "multiagent": {
            "policy_graphs": {
                "def_policy": (VTracePolicyGraph,
                               Box(0.0, 255.0, shape=(84, 84, 3)), Discrete(9),
                               {
                                   "gamma": 0.99
                               })
            },
            "policy_mapping_fn": tune.function(lambda agent_id: "def_policy"),
        },
        "env_config": env_actor_configs,
        "num_workers": args.num_workers,
        "num_envs_per_worker": args.envs_per_worker,
        "sample_batch_size": args.sample_bs_per_worker,
        "train_batch_size": args.train_bs
    }

    experiment_spec = tune.Experiment(
        "multi-carla/" + args.model_arch,
        "IMPALA",
        # timesteps_total is init with None (not 0) which causes issue
        # stop={"timesteps_total": args.num_steps},
        stop={"timesteps_since_restore": args.num_steps},
        config=config,
        checkpoint_freq=1000,
github eugenevinitsky / sequential_social_dilemma_games / run_scripts / train_baseline_a3c.py View on Github external
num_gpus_per_worker = 0
        num_cpus_per_worker = int(spare_cpus / num_workers)

    # hyperparams
    if tune_hparams:
        config.update({
            "train_batch_size": 128,
            "horizon": 1000,
            "lr_schedule": [[0, tune.grid_search([5e-4, 5e-3])],
                            [20000000, tune.grid_search([5e-4, 5e-5])]],
            "num_workers": num_workers,
            "num_gpus": gpus_for_driver,  # The number of GPUs for the driver
            "num_cpus_for_driver": cpus_for_driver,
            "num_gpus_per_worker": num_gpus_per_worker,   # Can be a fraction
            "num_cpus_per_worker": num_cpus_per_worker,   # Can be a fraction
            "entropy_coeff": tune.grid_search([0, -1e-1, -1e-2]),
            "multiagent": {
                "policy_graphs": policy_graphs,
                "policy_mapping_fn": tune.function(policy_mapping_fn),
            },
            "model": {"custom_model": "conv_to_fc_net", "use_lstm": True,
                      "lstm_cell_size": 128}
        })
    else:
        config.update({
            #"train_batch_size": 128,
            "horizon": 1000,
            # "lr_schedule": [[0, hparams['lr_init']],
            #                 [20000000, hparams['lr_final']]],
            "num_workers": num_workers,
            "num_gpus": gpus_for_driver,  # The number of GPUs for the driver
            "num_cpus_for_driver": cpus_for_driver,
github flow-project / flow / examples / rllib / multiagent_exps / MA_bottle_lc_noagg_nocomm.py View on Github external
alg_run = 'PPO'
    config = ppo.DEFAULT_CONFIG.copy()
    config['num_workers'] = N_CPUS
    config['train_batch_size'] = HORIZON * N_ROLLOUTS
    config['gamma'] = 0.999  # discount rate
    config['model'].update({'fcnet_hiddens': [64, 64]})
    config['clip_actions'] = True
    config['horizon'] = HORIZON
    config['vf_share_layers'] = True
    # config['use_centralized_vf'] = False
    # config['max_vf_agents'] = 140
    # config['simple_optimizer'] = True
    # config['vf_clip_param'] = 100

    # Grid search things
    config['lr'] = tune.grid_search([5e-5, 5e-4])
    config['num_sgd_iter'] = tune.grid_search([10, 30])

    # LSTM Things
    # config['model']['use_lstm'] = tune.grid_search([True, False])
    config['model']['lstm_use_prev_action_reward'] = True
    #config['model']['use_lstm'] = tune.grid_search([True, False])
    # # config['model']["max_seq_len"] = tune.grid_search([5, 10])
    config['model']["lstm_cell_size"] = 64

    # save the flow params for replay
    flow_json = json.dumps(
        flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, env_name = make_create_env(params=flow_params, version=0)
github flow-project / flow / examples / rllib / multiagent_exps / multiagent_bottleneck.py View on Github external
alg_run = 'PPO'
    config = ppo.DEFAULT_CONFIG.copy()
    config['num_workers'] = N_CPUS
    config['train_batch_size'] = HORIZON * N_ROLLOUTS
    config['gamma'] = 0.999  # discount rate
    config['model'].update({'fcnet_hiddens': [64, 64]})
    config['clip_actions'] = True
    config['horizon'] = HORIZON
    config['vf_share_layers'] = True
    # config['use_centralized_vf'] = False
    # config['max_vf_agents'] = 140
    # config['simple_optimizer'] = True
    # config['vf_clip_param'] = 100

    # Grid search things
    config['lr'] = tune.grid_search([5e-5, 5e-4])
    config['num_sgd_iter'] = tune.grid_search([10, 30])

    # LSTM Things
    config['model']['use_lstm'] = tune.grid_search([True, False])
    #config['model']['use_lstm'] = tune.grid_search([True, False])
    # # config['model']["max_seq_len"] = tune.grid_search([5, 10])
    # config['model']["lstm_cell_size"] = 64

    # save the flow params for replay
    flow_json = json.dumps(
        flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, env_name = make_create_env(params=flow_params, version=0)