How to use the gym.make function in gym

To help you get started, we’ve selected a few gym examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github showkeyjar / AutoMakeHuman / test / async-rl / async_dqn.py View on Github external
def evaluation(session, graph_ops, saver):
    saver.restore(session, FLAGS.checkpoint_path)
    print "Restored model weights from ", FLAGS.checkpoint_path
    monitor_env = gym.make(FLAGS.game)
    monitor_env.monitor.start(FLAGS.eval_dir + "/" + FLAGS.experiment + "/eval")

    # Unpack graph ops
    s = graph_ops["s"]
    q_values = graph_ops["q_values"]

    # Wrap env with AtariEnvironment helper class
    env = AtariEnvironment(gym_env=monitor_env, resized_width=FLAGS.resized_width, resized_height=FLAGS.resized_height,
                           agent_history_length=FLAGS.agent_history_length)

    for i_episode in xrange(FLAGS.num_eval_episodes):
        s_t = env.get_initial_state()
        ep_reward = 0
        terminal = False
        while not terminal:
            monitor_env.render()
github showkeyjar / AutoMakeHuman / test / async-rl / a3c.py View on Github external
def train(session, graph_ops, saver):
    # Set up game environments (one per thread)
    envs = [gym.make(GAME) for i in range(NUM_CONCURRENT)]
    
    summary_ops = setup_summaries()
    summary_op = summary_ops[-1]

    # Initialize variables
    session.run(tf.global_variables_initializer())
    writer = tf.summary.FileWriter(SUMMARY_SAVE_PATH, session.graph)

    # Start NUM_CONCURRENT training threads
    actor_learner_threads = [threading.Thread(target=actor_learner_thread, args=(thread_id, envs[thread_id], session, graph_ops, summary_ops, saver)) for thread_id in range(NUM_CONCURRENT)]
    for t in actor_learner_threads:
        t.start()

    # Show the agents training and write summary statistics
    last_summary_time = 0
    while True:
github bowenliu16 / rl_graph_generation / rl-baselines / baselines / ppo1 / gcn_policy.py View on Github external
if __name__ == "__main__":
    adj_np = np.ones((5,3,4,4))
    adj = tf.placeholder(shape=(5,3,4,4),dtype=tf.float32)
    node_feature_np = np.ones((5,1,4,3))
    node_feature = tf.placeholder(shape=(5,1,4,3),dtype=tf.float32)


    ob_space = {}
    atom_type = 5
    ob_space['adj'] = gym.Space(shape=[3,5,5])
    ob_space['node'] = gym.Space(shape=[1,5,atom_type])
    ac_space = gym.spaces.MultiDiscrete([10, 10, 3])
    policy = GCNPolicy(name='policy',ob_space=ob_space,ac_space=ac_space)

    stochastic = True
    env = gym.make('molecule-v0')  # in gym format
    env.init()
    ob = env.reset()

    # ob['adj'] = np.repeat(ob['adj'][None],2,axis=0)
    # ob['node'] = np.repeat(ob['node'][None],2,axis=0)

    print('adj',ob['adj'].shape)
    print('node',ob['node'].shape)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(20):
            ob = env.reset()
            for j in range(0,20):
                ac,vpred,debug = policy.act(stochastic,ob)
                # if ac[0]==ac[1]:
                #     print('error')
github dragen1860 / TensorFlow-2.x-Tutorials / 17-A2C / a2c.py View on Github external
# from_logits argument ensures transformation into normalized probabilities
        weighted_sparse_ce = kls.SparseCategoricalCrossentropy(from_logits=True)
        # policy loss is defined by policy gradients, weighted by advantages
        # note: we only calculate the loss on the actions we've actually taken
        actions = tf.cast(actions, tf.int32)
        policy_loss = weighted_sparse_ce(actions, logits, sample_weight=advantages)
        # entropy loss can be calculated via CE over itself
        entropy_loss = kls.categorical_crossentropy(logits, logits, from_logits=True)
        # here signs are flipped because optimizer minimizes
        return policy_loss - self.params['entropy']*entropy_loss


if __name__ == '__main__':
    logging.getLogger().setLevel(logging.INFO)

    env = gym.make('CartPole-v0')
    model = Model(num_actions=env.action_space.n)
    agent = A2CAgent(model)
    
    rewards_history = agent.train(env)
    print("Finished training.")
    print("Total Episode Reward: %d out of 200" % agent.test(env, True))
    
    plt.style.use('seaborn')
    plt.plot(np.arange(0, len(rewards_history), 25), rewards_history[::25])
    plt.xlabel('Episode')
    plt.ylabel('Total Reward')
    plt.show()
github chainer / chainerrl / examples / mujoco / train_trpo_gym.py View on Github external
def make_env(test):
        env = gym.make(args.env)
        # Use different random seeds for train and test envs
        env_seed = 2 ** 32 - args.seed if test else args.seed
        env.seed(env_seed)
        # Cast observations to float32 because our model uses float32
        env = chainerrl.wrappers.CastObservationToFloat32(env)
        if args.monitor:
            env = chainerrl.wrappers.Monitor(env, args.outdir)
        if args.render:
            env = chainerrl.wrappers.Render(env)
        return env
github rlworkgroup / garage / corl / mtppo_baby_mode_reach_push.py View on Github external
def make_envs(env_names):
    return [TfEnv(normalize(gym.make(env_name))) for env_name in env_names]
github amuta / DDPG-MountainCarContinuous-v0 / MountainCar.py View on Github external
def new_env(self):
        gym.logger.set_level(40)  # to surpress warnings
        return gym.make('MountainCarContinuous-v0').unwrapped
github tensorlayer / RLzoo / baselines / algorithms / a3c / run_a3c.py View on Github external
import gym

# from common.env_wrappers import DummyVecEnv
from common.utils import make_env
from algorithms.a3c.a3c import A3C
from common.value_networks import *
from common.policy_networks import *


''' load environment '''
env_id='BipedalWalker-v2'
env = gym.make(env_id).unwrapped
# env = DummyVecEnv([lambda: env])  # The algorithms require a vectorized/wrapped environment to run
action_shape = env.action_space.shape
state_shape = env.observation_space.shape
# reproducible
seed = 2
np.random.seed(seed)
tf.random.set_seed(seed)
env.seed(seed)

''' build networks for the algorithm '''
num_hidden_layer = 4 #number of hidden layers for the networks
hidden_dim=64 # dimension of hidden layers for the networks
num_workers = 2
net_list2 = []
for i in range(num_workers+1):
    with tf.name_scope('A3C'):
github tomsilver / pddlgym / custom / inverse_planning / demo.py View on Github external
def run_async_vi_experiment(gym_name, problems, vi_maxiters=2500, iter_plan_interval=100,
                            first_plot_interval=1000, use_cache=False, biased=False):
    start_time = time.time()
    all_results = []
    env = gym.make(gym_name)
    test_env = gym.make(gym_name)
    env._render = None
    test_env._render = None
    if isinstance(problems, int):
        problems = list(range(problems))
    num_problems = len(problems)
    for j, problem_index in enumerate(problems):
        print("\nRunning problem {}/{}".format(j, num_problems))
        results_for_problem = []
        all_results.append(results_for_problem)
        env.fix_problem_index(problem_index)
        env.reset()
        runner = run_async_value_iteration(env, iter_plans=True, use_cache=use_cache,
            iter_plan_interval=iter_plan_interval, epsilon=0., vi_maxiters=vi_maxiters,
            biased=biased)
        for i, plan in enumerate(runner):
            test_env.fix_problem_index(problem_index)
github flow-project / flow / examples / aimsun / traffic_light_coordination / visualization.py View on Github external
# pick your rendering mode
    sim_params.render = render
    sim_params.restart_instance = False
    create_env, env_name = make_create_env(params=flow_params, version=version)
    register_env(env_name, create_env)

    env_params = flow_params['env']
    env_params.restart_instance = False

    # create the agent that will be used to compute the actions
    agent = agent_cls(env=env_name, config=config)
    checkpoint = result_dir + '/checkpoint_{}'.format(checkpoint_num)
    checkpoint = checkpoint + '/checkpoint-{}'.format(checkpoint_num)
    agent.restore(checkpoint)

    env = gym.make(env_name)

    if sim_params.restart_instance:
        env.restart_simulation(sim_params=sim_params, render=sim_params.render)

    return env, env_params, agent