How to use the ray.tune.grid_search function in ray

To help you get started, we’ve selected a few ray examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github flow-project / flow / examples / rllib / multiagent_exps / MA_bottle_nolc_noagg_nocomm.py View on Github external
config = ppo.DEFAULT_CONFIG.copy()
    config['num_workers'] = N_CPUS
    config['train_batch_size'] = HORIZON * N_ROLLOUTS
    config['gamma'] = 0.999  # discount rate
    config['model'].update({'fcnet_hiddens': [64, 64]})
    config['clip_actions'] = True
    config['horizon'] = HORIZON
    config['vf_share_layers'] = True
    # config['use_centralized_vf'] = False
    # config['max_vf_agents'] = 140
    # config['simple_optimizer'] = True
    # config['vf_clip_param'] = 100

    # Grid search things
    config['lr'] = tune.grid_search([5e-5, 5e-4])
    config['num_sgd_iter'] = tune.grid_search([10, 30])

    # LSTM Things
    # config['model']['use_lstm'] = tune.grid_search([True, False])
    config['model']['lstm_use_prev_action_reward'] = True
    #config['model']['use_lstm'] = tune.grid_search([True, False])
    # # config['model']["max_seq_len"] = tune.grid_search([5, 10])
    config['model']["lstm_cell_size"] = 64

    # save the flow params for replay
    flow_json = json.dumps(
        flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, env_name = make_create_env(params=flow_params, version=0)
github flow-project / flow / examples / rllib / multiagent_exps / MA_bottle_LC_noagg_comm_p4.py View on Github external
config = ppo.DEFAULT_CONFIG.copy()
    config['num_workers'] = N_CPUS
    config['train_batch_size'] = HORIZON * N_ROLLOUTS
    config['gamma'] = 0.999  # discount rate
    config['model'].update({'fcnet_hiddens': [64, 64]})
    config['clip_actions'] = True
    config['horizon'] = HORIZON
    config['vf_share_layers'] = True
    # config['use_centralized_vf'] = False
    # config['max_vf_agents'] = 140
    # config['simple_optimizer'] = True
    # config['vf_clip_param'] = 100

    # Grid search things
    config['lr'] = tune.grid_search([5e-5, 5e-4])
    config['num_sgd_iter'] = tune.grid_search([10, 30])

    # LSTM Things
    # config['model']['use_lstm'] = tune.grid_search([True, False])
    config['model']['lstm_use_prev_action_reward'] = True
    #config['model']['use_lstm'] = tune.grid_search([True, False])
    # # config['model']["max_seq_len"] = tune.grid_search([5, 10])
    config['model']["lstm_cell_size"] = 64

    # save the flow params for replay
    flow_json = json.dumps(
        flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, env_name = make_create_env(params=flow_params, version=0)
github avisingh599 / reward-learning-rl / examples / mujoco_all_diayn.py View on Github external
from softlearning.samplers.utils import get_sampler_from_variant
from softlearning.misc.nn import feedforward_model
from softlearning.value_functions.utils import (
    create_Q_function_from_variant,
    create_V_function_from_variant)

from examples.utils import (
    parse_universe_domain_task,
    get_parser,
    launch_experiments_rllab)

COMMON_PARAMS = {
    'algorithm_params': {
        'type': 'DIAYN'
    },
    'seed': tune.grid_search([1]),
    'lr': 3E-4,
    'discount': 0.99,
    'tau': 0.01,
    'K': 4,
    'layer_size': 256,
    'batch_size': 128,
    'n_train_repeat': 1,
    'epoch_length': 1000,
    'snapshot_mode': 'gap',
    'snapshot_gap': 10,
    'sync_pkl': True,
    'num_skills': 50,
    'scale_entropy': 0.1,
    'include_actions': False,
    'learn_p_z': False,
    'add_p_z': True,
github HumanCompatibleAI / adversarial-policies / src / aprl / configs / multi / train.py View on Github external
"""Does using VecNormalize make a difference in performance?
        (Answer: not much after we rescaled reward; before the reward clipping had big effect.)"""
        train = dict(train)
        _sparse_reward(train)
        train["total_timesteps"] = int(5e6)
        train["learning_rate"] = 2.5e-4
        train["batch_size"] = 2048
        train["rl_args"] = {"ent_coef": 0.00}
        spec = {
            "config": {
                "env_name": tune.grid_search(
                    ["multicomp/KickAndDefend-v0", "multicomp/SumoAnts-v0"],
                ),
                "seed": tune.grid_search(list(range(3))),
                "embed_path": tune.grid_search(["1", "2", "3"]),
                "normalize": tune.grid_search([True, False]),
            },
        }
        exp_name = "vec_normalize"
        _ = locals()  # quieten flake8 unused variable warning
        del _
github HumanCompatibleAI / adversarial-policies / src / aprl / configs / multi / train.py View on Github external
def _generic_finetune_defense(train, dual_defense=False, envs=None, exp_suffix=""):
    """Finetuning victim against adversary.

    This is the most generic helper method, used as a base for `_hyper_finetune_defense`
    and `_finetune_defense`.
    """
    _sparse_reward(train)
    train["num_env"] = 16  # TODO(adam): cleaner way of allowing finetuning LSTMs
    train["normalize_observations"] = False
    ray_config = {
        FINETUNE_PATHS_TYPES: tune.grid_search(
            _finetune_configs(envs=envs, dual_defense=dual_defense)
        ),
    }
    dual_name = "dual" if dual_defense else "single"
    exp_name = f"finetune_defense_{dual_name}_{exp_suffix}"

    return ray_config, exp_name
github flow-project / flow / examples / rllib / multiagent_exps / multiagent_highway.py View on Github external
str
        name of the training algorithm
    str
        name of the gym environment to be trained
    dict
        training configuration parameters
    """
    alg_run = 'PPO'
    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config['num_workers'] = N_CPUS
    config['train_batch_size'] = HORIZON * N_ROLLOUTS
    config['simple_optimizer'] = True
    config['gamma'] = 0.999  # discount rate
    config['model'].update({'fcnet_hiddens': [32, 32]})
    config['lr'] = tune.grid_search([1e-5])
    config['horizon'] = HORIZON
    config['clip_actions'] = False
    config['observation_filter'] = 'NoFilter'

    # save the flow params for replay
    flow_json = json.dumps(
        flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, env_name = make_create_env(params=flow_params, version=0)

    # register as rllib env
    register_env(env_name, create_env)

    # multiagent configuration
github flow-project / flow / examples / rllib / multiagent_exps / MA_bottle_nolc_agg_nocomm.py View on Github external
config = ppo.DEFAULT_CONFIG.copy()
    config['num_workers'] = N_CPUS
    config['train_batch_size'] = HORIZON * N_ROLLOUTS
    config['gamma'] = 0.999  # discount rate
    config['model'].update({'fcnet_hiddens': [64, 64]})
    config['clip_actions'] = True
    config['horizon'] = HORIZON
    config['vf_share_layers'] = True
    # config['use_centralized_vf'] = False
    # config['max_vf_agents'] = 140
    # config['simple_optimizer'] = True
    # config['vf_clip_param'] = 100

    # Grid search things
    config['lr'] = tune.grid_search([5e-5, 5e-4])
    config['num_sgd_iter'] = tune.grid_search([10, 30])

    # LSTM Things
    # config['model']['use_lstm'] = tune.grid_search([True, False])
    config['model']['lstm_use_prev_action_reward'] = True
    #config['model']['use_lstm'] = tune.grid_search([True, False])
    # # config['model']["max_seq_len"] = tune.grid_search([5, 10])
    config['model']["lstm_cell_size"] = 64
    # save the flow params for replay
    flow_json = json.dumps(
        flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
    config['env_config']['flow_params'] = flow_json
    config['env_config']['run'] = alg_run

    create_env, env_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env
github intel-analytics / analytics-zoo / pyzoo / zoo / automl / search / RayTuneSearchEngine.py View on Github external
def _prepare_tune_config(self, space):
        tune_config = {}
        for k, v in space.items():
            if isinstance(v, RandomSample):
                tune_config[k] = tune.sample_from(v.func)
            elif isinstance(v, GridSearch):
                tune_config[k] = tune.grid_search(v.values)
            else:
                tune_config[k] = v
        return tune_config