How to use the rlcard.utils.utils.set_global_seed function in rlcard

To help you get started, we’ve selected a few rlcard examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github datamllab / rlcard / examples / blackjack_dqn_multi_process.py View on Github external
for _ in range(tasks):
                        trajectories, _ = self.env.run(is_training=train_flag)
                        self.output_queue.put(trajectories)
                self.input_queue.task_done()
            else:
                self.input_queue.task_done()
                break
        self.sess.close()
        return

if __name__ == '__main__':
	# Avoid RuntimeError
	multiprocessing.freeze_support()

	# Set a global seed
	set_global_seed(0)

	# Initialize processes
	PROCESS_NUM = 16
	INPUT_QUEUE = JoinableQueue()
	OUTPUT_QUEUE = Queue()
	PROCESSES = [BlackjackProcess(index, INPUT_QUEUE, OUTPUT_QUEUE, np.random.randint(1000000))
				 for index in range(PROCESS_NUM)]
	for p in PROCESSES:
		p.start()

	# Make environment
	env = rlcard.make('blackjack')
	eval_env = rlcard.make('blackjack')

	with tf.Session() as sess:
github datamllab / rlcard / examples / nolimit_holdem_dqn.py View on Github external
env = rlcard.make('no-limit-holdem')
eval_env = rlcard.make('no-limit-holdem')

# Set the iterations numbers and how frequently we evaluate/save plot
evaluate_every = 1000
save_plot_every = 10000
evaluate_num = 10000
episode_num = 1000000

# Set the the number of steps for collecting normalization statistics
# and intial memory size
memory_init_size = 1000
norm_step = 100

# Set a global seed
set_global_seed(0)

with tf.Session() as sess:
    # Set agents
    global_step = tf.Variable(0, name='global_step', trainable=False)
    agent = DQNAgent(sess,
                     scope='dqn',
                     action_num=env.action_num,
                     replay_memory_size=int(1e5),
                     replay_memory_init_size=memory_init_size,
                     norm_step=norm_step,
                     state_shape=[52],
                     mlp_layers=[512, 512])

    random_agent = RandomAgent(action_num=eval_env.action_num)

    sess.run(tf.global_variables_initializer())
github datamllab / rlcard / examples / uno_nfsp.py View on Github external
evaluate_num = 10000
episode_num = 10000000

# Set the the number of steps for collecting normalization statistics
# and intial memory size
memory_init_size = 1000
norm_step = 1000

# The paths for saving the logs and learning curves
root_path = './experiments/uno_nfsp_result/'
log_path = root_path + 'log.txt'
csv_path = root_path + 'performance.csv'
figure_path = root_path + 'figures/'

# Set a global seed
set_global_seed(0)

with tf.Session() as sess:
    # Set agents
    global_step = tf.Variable(0, name='global_step', trainable=False)
    agents = []
    for i in range(env.player_num):
        agent = NFSPAgent(sess,
                          scope='nfsp' + str(i),
                          action_num=env.action_num,
                          state_shape=env.state_shape,
                          hidden_layers_sizes=[512,1024,2048,1024,512],
                          anticipatory_param=0.5,
                          batch_size=256,
                          rl_learning_rate=0.00005,
                          sl_learning_rate=0.00001,
                          min_buffer_size_to_learn=memory_init_size,
github datamllab / rlcard / examples / mahjong_dqn.py View on Github external
evaluate_num = 5000
episode_num = 1000000

# Set the the number of steps for collecting normalization statistics
# and intial memory size
memory_init_size = 1000
norm_step = 1000

# The paths for saving the logs and learning curves
root_path = './experiments/mahjong_dqn_result/'
log_path = root_path + 'log.txt'
csv_path = root_path + 'performance.csv'
figure_path = root_path + 'figures/'

# Set a global seed
set_global_seed(0)

with tf.Session() as sess:
    # Set agents
    global_step = tf.Variable(0, name='global_step', trainable=False)
    agent = DQNAgent(sess,
                     scope='dqn',
                     action_num=env.action_num,
                     replay_memory_size=20000,
                     replay_memory_init_size=memory_init_size,
                     norm_step=norm_step,
                     state_shape=env.state_shape,
                     mlp_layers=[512, 512])

    random_agent = RandomAgent(action_num=eval_env.action_num)

    sess.run(tf.global_variables_initializer())
github datamllab / rlcard / examples / uno_dqn.py View on Github external
evaluate_num = 10000
episode_num = 1000000

# Set the the number of steps for collecting normalization statistics
# and intial memory size
memory_init_size = 1000
norm_step = 1000

# The paths for saving the logs and learning curves
root_path = './experiments/uno_dqn_result/'
log_path = root_path + 'log.txt'
csv_path = root_path + 'performance.csv'
figure_path = root_path + 'figures/'

# Set a global seed
set_global_seed(0)

with tf.Session() as sess:
    # Set agents
    global_step = tf.Variable(0, name='global_step', trainable=False)
    agent = DQNAgent(sess,
                     scope='dqn',
                     action_num=env.action_num,
                     replay_memory_size=20000,
                     replay_memory_init_size=memory_init_size,
                     norm_step=norm_step,
                     state_shape=env.state_shape,
                     mlp_layers=[512, 512])

    random_agent = RandomAgent(action_num=eval_env.action_num)

    sess.run(tf.global_variables_initializer())
github datamllab / rlcard / examples / doudizhu_dqn.py View on Github external
env = rlcard.make('doudizhu')
eval_env = rlcard.make('doudizhu')

# Set the iterations numbers and how frequently we evaluate/save plot
evaluate_every = 200
save_plot_every = 5000
evaluate_num = 200
episode_num = 1000000

# Set the the number of steps for collecting normalization statistics
# and intial memory size
memory_init_size = 1000
norm_step = 100

# Set a global seed
set_global_seed(0)

with tf.Session() as sess:
    # Set agents
    agent = DQNAgent(sess,
                       action_num=env.action_num,
                       replay_memory_size=20000,
                       replay_memory_init_size=memory_init_size,
                       norm_step=norm_step,
                       state_shape=[6, 5, 15],
                       mlp_layers=[512, 512])

    random_agent = RandomAgent(action_num=eval_env.action_num)

    env.set_agents([agent, random_agent, random_agent])
    eval_env.set_agents([agent, random_agent, random_agent])
github datamllab / rlcard / examples / mahjong_nfsp.py View on Github external
evaluate_num = 5000
episode_num = 10000000

# Set the the number of steps for collecting normalization statistics
# and intial memory size
memory_init_size = 1000
norm_step = 1000

# The paths for saving the logs and learning curves
root_path = './experiments/mahjong_nfsp_result/'
log_path = root_path + 'log.txt'
csv_path = root_path + 'performance.csv'
figure_path = root_path + 'figures/'

# Set a global seed
set_global_seed(0)

with tf.Session() as sess:
    # Set agents
    global_step = tf.Variable(0, name='global_step', trainable=False)
    agents = []
    for i in range(env.player_num):
        agent = NFSPAgent(sess,
                          scope='nfsp' + str(i),
                          action_num=env.action_num,
                          state_shape=env.state_shape,
                          hidden_layers_sizes=[512,1024,2048,1024,512],
                          anticipatory_param=0.5,
                          batch_size=256,
                          rl_learning_rate=0.00005,
                          sl_learning_rate=0.00001,
                          min_buffer_size_to_learn=memory_init_size,
github datamllab / rlcard / examples / doudizhu_random_process_pool.py View on Github external
multiprocessing.freeze_support()

    # Set the number of process
    process_num = 8

    # Initialize process pool
    pool = multiprocessing.Pool(process_num)

    # Set game and make environment
    env = rlcard.make('doudizhu')

    # Set episode_num
    episode_num = 10000

    # Set global seed
    set_global_seed(1)

    # Set up agents
    agent_num = env.game.num_players
    env.set_agents([RandomAgent(action_num=env.action_num)
                    for _ in range(agent_num)])

    # Run game
    trajectories_set = []
    for episode in range(episode_num):

        # Generate data from the environment
        result = pool.apply_async(env.run, args=(False, np.random.randint(10000000)))
        trajectories_set.append(result)
    for result in trajectories_set:
        trajectories, player_wins = result.get()
        # print(trajectories, player_wins)
github datamllab / rlcard / examples / uno_random.py View on Github external
''' A toy example of playing Uno with random agents
'''

import rlcard
from rlcard.agents.random_agent import RandomAgent
from rlcard.utils.utils import set_global_seed

# Make environment
env = rlcard.make('uno')
episode_num = 2

# Set a global seed
set_global_seed(0)

# Set up agents
agent_0 = RandomAgent(action_num=env.action_num)
agent_1 = RandomAgent(action_num=env.action_num)
agent_2 = RandomAgent(action_num=env.action_num)
agent_3 = RandomAgent(action_num=env.action_num)
env.set_agents([agent_0, agent_1, agent_2, agent_3])

for episode in range(episode_num):

    # Generate data from the environment
    trajectories, _ = env.run(is_training=False)

    # Print out the trajectories
    print('\nEpisode {}'.format(episode))
    for ts in trajectories[0]:
github datamllab / rlcard / examples / blackjack_random.py View on Github external
''' A toy example of playing Blackjack with random agents
'''

import rlcard
from rlcard.agents.random_agent import RandomAgent
from rlcard.utils.utils import set_global_seed

# Make environment
env = rlcard.make('blackjack')
episode_num = 2

# Set a global seed
set_global_seed(1)

# Set up agents
agent_0 = RandomAgent(action_num=env.action_num)
env.set_agents([agent_0])

for episode in range(episode_num):

    # Generate data from the environment
    trajectories, _ = env.run(is_training=False)

    # Print out the trajectories
    print('\nEpisode {}'.format(episode))
    for ts in trajectories[0]:
        print('State: {}, Action: {}, Reward: {}, Next State: {}, Done: {}'.format(ts[0], ts[1], ts[2], ts[3], ts[4]))