Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_train(self):
env = rlcard.make('leduc-holdem', allow_step_back=True)
agent = CFRAgent(env)
for _ in range(100):
agent.train()
state = {'obs': np.array([1., 1., 0., 0., 0., 0.]), 'legal_actions': [0,2]}
action = agent.eval_step(state)
self.assertIn(action, [0, 2])
# Example of using doudizhu environment
import rlcard
from rlcard.agents.random_agent import RandomAgent
# make environment
env = rlcard.make('simpletexasholdem')
print('############## Environment of Simple Texas Holdem Initilized ################')
env.test()
# set agents
agent_0 = RandomAgent()
agent_1 = RandomAgent()
agent_2 = RandomAgent()
env.set_agents([agent_0, agent_1, agent_2])
# seed everything
env.set_seed(0)
agent_0.set_seed(0)
agent_1.set_seed(0)
agent_2.set_seed(0)
''' An example of learning a NFSP Agent on Dou Dizhu
'''
import tensorflow as tf
import rlcard
from rlcard.agents.nfsp_agent import NFSPAgent
from rlcard.agents.random_agent import RandomAgent
from rlcard.utils.utils import set_global_seed
from rlcard.utils.logger import Logger
# Make environment
env = rlcard.make('mahjong')
eval_env = rlcard.make('mahjong')
# Set the iterations numbers and how frequently we evaluate/save plot
evaluate_every = 100
save_plot_every = 1000
evaluate_num = 5000
episode_num = 10000000
# Set the the number of steps for collecting normalization statistics
# and intial memory size
memory_init_size = 1000
norm_step = 1000
# The paths for saving the logs and learning curves
root_path = './experiments/mahjong_nfsp_result/'
log_path = root_path + 'log.txt'
# Avoid RuntimeError
multiprocessing.freeze_support()
# Set the number of process
process_num = 8
# Set episode_num
episode_num = 10000
# Assign tasks
per_tasks = assign_task(episode_num, process_num)
# Set game and make environment
game = 'doudizhu'
env = rlcard.make(game)
# Set global seed
set_global_seed(1)
# Set up agents
agent_num = env.player_num
env.set_agents([RandomAgent(action_num=env.action_num)
for _ in range(agent_num)])
# Set a global list to reserve trajectories
manager = multiprocessing.Manager()
trajectories_set = manager.list()
# Generate Processes
processes = []
for p in range(process_num):
''' An example of learning a Deep-Q Agent on Texas No-Limit Holdem
'''
import tensorflow as tf
import rlcard
from rlcard.agents.dqn_agent import DQNAgent
from rlcard.agents.random_agent import RandomAgent
from rlcard.utils.utils import set_global_seed
from rlcard.utils.logger import Logger
# Make environment
env = rlcard.make('no-limit-holdem')
eval_env = rlcard.make('no-limit-holdem')
# Set the iterations numbers and how frequently we evaluate/save plot
evaluate_every = 1000
save_plot_every = 10000
evaluate_num = 10000
episode_num = 1000000
# Set the the number of steps for collecting normalization statistics
# and intial memory size
memory_init_size = 1000
norm_step = 100
# Set a global seed
set_global_seed(0)
''' An example of learning a Deep-Q Agent on Texas Limit Holdem
'''
import tensorflow as tf
import rlcard
from rlcard.agents.dqn_agent import DQNAgent
from rlcard.agents.random_agent import RandomAgent
from rlcard.utils.utils import set_global_seed
from rlcard.utils.logger import Logger
# Make environment
env = rlcard.make('limit-holdem')
eval_env = rlcard.make('limit-holdem')
# Set the iterations numbers and how frequently we evaluate/save plot
evaluate_every = 100
save_plot_every = 1000
evaluate_num = 10000
episode_num = 1000000
# Set the the number of steps for collecting normalization statistics
# and intial memory size
memory_init_size = 1000
norm_step = 100
# The paths for saving the logs and learning curves
root_path = './experiments/limit_holdem_dqn_result/'
log_path = root_path + 'log.txt'
csv_path = root_path + 'performance.csv'
''' An example of learning a NFSP Agent on Dou Dizhu
'''
import tensorflow as tf
import rlcard
from rlcard.agents.nfsp_agent import NFSPAgent
from rlcard.agents.random_agent import RandomAgent
from rlcard.utils.utils import set_global_seed
from rlcard.utils.logger import Logger
# Make environment
env = rlcard.make('doudizhu')
eval_env = rlcard.make('doudizhu')
# Set the iterations numbers and how frequently we evaluate/save plot
evaluate_every = 500
save_plot_every = 10000
evaluate_num = 1000
episode_num = 10000000
# Set the the number of steps for collecting normalization statistics
# and intial memory size
memory_init_size = 1000
norm_step = 1000
# Set a global seed
set_global_seed(0)
''' An example of learning a NFSP Agent on Leduc Hold'em
'''
import tensorflow as tf
import rlcard
from rlcard.agents.nfsp_agent import NFSPAgent
from rlcard.agents.random_agent import RandomAgent
from rlcard.utils.utils import set_global_seed
from rlcard.utils.logger import Logger
# Make environment
env = rlcard.make('leduc-holdem')
eval_env = rlcard.make('leduc-holdem')
# Set the iterations numbers and how frequently we evaluate/save plot
evaluate_every = 1000
save_plot_every = 10000
evaluate_num = 10000
episode_num = 10000000
# Set the the number of steps for collecting normalization statistics
# and intial memory size
memory_init_size = 1000
norm_step = 1000
# Set a global seed
set_global_seed(0)
if __name__ == '__main__':
# Timer start
start = time.time()
# Avoid RuntimeError
multiprocessing.freeze_support()
# Set the number of process
process_num = 8
# Initialize process pool
pool = multiprocessing.Pool(process_num)
# Set game and make environment
env = rlcard.make('doudizhu')
# Set episode_num
episode_num = 10000
# Set global seed
set_global_seed(1)
# Set up agents
agent_num = env.game.num_players
env.set_agents([RandomAgent(action_num=env.action_num)
for _ in range(agent_num)])
# Run game
trajectories_set = []
for episode in range(episode_num):