Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
agent = DQNAgent(sess,
scope='dqn',
action_num=env.action_num,
replay_memory_init_size=memory_init_size,
norm_step=norm_step,
state_shape=env.state_shape,
mlp_layers=[10, 10])
env.set_agents([agent])
eval_env.set_agents([agent])
sess.run(tf.global_variables_initializer())
# Count the number of steps
step_counter = 0
# Init a Logger to plot the learning curve
logger = Logger(xlabel='timestep', ylabel='reward',
legend='DQN on Blackjack', log_path=log_path, csv_path=csv_path)
for episode in range(episode_num // evaluate_every):
# Generate data from the environment
tasks = assign_task(evaluate_every, PROCESS_NUM)
for task in tasks:
INPUT_QUEUE.put((task, True, None, None))
for _ in range(evaluate_every):
trajectories = OUTPUT_QUEUE.get()
# Feed transitions into agent memory, and train
for ts in trajectories[0]:
agent.feed(ts)
step_counter += 1
def test_close_file(self):
logger = Logger(xlabel="x", ylabel="y", legend="test", log_path="./newtest/test_log.txt",csv_path="./newtest/test_csv.csv")
logger.close_file()
self.assertTrue(os.path.exists('./newtest/'))
def test_make_plot(self):
logger = Logger(xlabel="x", ylabel="y", legend="test")
for x in range(10):
logger.add_point(x=x, y=x*x)
self.assertEqual(9*9, logger.ys[9])
logger.make_plot(save_path='./newtest/test.png')
norm_step=norm_step,
state_shape=env.state_shape,
mlp_layers=[512, 512])
random_agent = RandomAgent(action_num=eval_env.action_num)
sess.run(tf.global_variables_initializer())
env.set_agents([agent, random_agent])
eval_env.set_agents([agent, random_agent])
# Count the number of steps
step_counter = 0
# Init a Logger to plot the learning curve
logger = Logger(xlabel='timestep', ylabel='reward', legend='DQN on Limit Texas Holdem', log_path=log_path, csv_path=csv_path)
for episode in range(episode_num):
# Generate data from the environment
trajectories, _ = env.run(is_training=True)
# Feed transitions into agent memory, and train the agent
for ts in trajectories[0]:
agent.feed(ts)
step_counter += 1
# Train the agent
train_count = step_counter - (memory_init_size + norm_step)
if train_count > 0:
loss = agent.train()
print('\rINFO - Step {}, loss: {}'.format(step_counter, loss), end='')
q_norm_step=norm_step,
q_mlp_layers=[512,512])
agents.append(agent)
sess.run(tf.global_variables_initializer())
random_agent = RandomAgent(action_num=eval_env.action_num)
env.set_agents(agents)
eval_env.set_agents([agents[0], random_agent])
# Count the number of steps
step_counters = [0 for _ in range(env.player_num)]
# Init a Logger to plot the learning curve
logger = Logger(xlabel='timestep', ylabel='reward', legend='NFSP on No-Limit Texas Holdem', log_path='./experiments/nolimit_holdem_nfsp_result/log.txt', csv_path='./experiments/nolimit_holdem_nfsp_result/performance.csv')
for episode in range(episode_num):
# First sample a policy for the episode
for agent in agents:
agent.sample_episode_policy()
# Generate data from the environment
trajectories, _ = env.run(is_training=True)
# Feed transitions into agent memory, and train the agent
for i in range(env.player_num):
for ts in trajectories[i]:
agents[i].feed(ts)
step_counters[i] += 1
norm_step=norm_step,
state_shape=env.state_shape,
mlp_layers=[512, 512])
random_agent = RandomAgent(action_num=eval_env.action_num)
sess.run(tf.global_variables_initializer())
env.set_agents([agent, random_agent, random_agent])
eval_env.set_agents([agent, random_agent, random_agent])
# Count the number of steps
step_counter = 0
# Init a Logger to plot the learning curve
logger = Logger(xlabel='timestep', ylabel='reward', legend='DQN on UNO', log_path=log_path, csv_path=csv_path)
for episode in range(episode_num):
# Generate data from the environment
trajectories, _ = env.run(is_training=True)
# Feed transitions into agent memory, and train the agent
for ts in trajectories[0]:
agent.feed(ts)
step_counter += 1
# Train the agent
train_count = step_counter - (memory_init_size + norm_step)
if train_count > 0:
loss = agent.train()
print('\rINFO - Step {}, loss: {}'.format(step_counter, loss), end='')
q_norm_step=norm_step,
q_mlp_layers=[128,128])
agents.append(agent)
sess.run(tf.global_variables_initializer())
random_agent = RandomAgent(action_num=eval_env.action_num)
env.set_agents(agents)
eval_env.set_agents([agents[0], random_agent])
# Count the number of steps
step_counters = [0 for _ in range(env.player_num)]
# Init a Logger to plot the learning curve
logger = Logger(xlabel='timestep', ylabel='reward', legend='NFSP on Leduc Holdem', log_path='./experiments/leduc_holdem_nfsp_result/log.txt', csv_path='./experiments/leduc_holdem_nfsp_result/performance.csv')
for episode in range(episode_num):
# First sample a policy for the episode
for agent in agents:
agent.sample_episode_policy()
# Generate data from the environment
trajectories, _ = env.run(is_training=True)
# Feed transitions into agent memory, and train the agent
for i in range(env.player_num):
for ts in trajectories[i]:
agents[i].feed(ts)
step_counters[i] += 1
with tf.Session() as sess:
# Set agents
agent = DQNAgent(sess,
action_num=env.action_num,
replay_memory_init_size=memory_init_size,
norm_step=norm_step,
state_shape=[2],
mlp_layers=[10,10])
env.set_agents([agent])
# Count the number of steps
step_counter = 0
# Init a Logger to plot the learning curve
logger = Logger(xlabel='eposide', ylabel='reward', legend='DQN on Blackjack', log_path='./experiments/blackjack_dqn_result/log.txt', csv_path='./experiments/blackjack_dqn_result/performance.csv')
for episode in range(episode_num):
# Generate data from the environment
trajectories, _ = env.run(is_training=True)
# Feed transitions into agent memory, and train
for ts in trajectories[0]:
agent.feed(ts)
step_counter += 1
# Train the agent
if step_counter > memory_init_size + norm_step:
agent.train()
# Evaluate the performance
norm_step=norm_step,
state_shape=[6],
mlp_layers=[128, 128])
random_agent = RandomAgent(action_num=eval_env.action_num)
sess.run(tf.global_variables_initializer())
env.set_agents([agent, random_agent])
eval_env.set_agents([agent, random_agent])
# Count the number of steps
step_counter = 0
# Init a Logger to plot the learning curve
logger = Logger(xlabel='timestep', ylabel='reward', legend='DQN on Leduc Holdem', log_path='./experiments/leduc_holdem_dqn_result/log.txt', csv_path='./experiments/leduc_holdem_dqn_result/performance.csv')
for episode in range(episode_num):
# Generate data from the environment
trajectories, _ = env.run(is_training=True)
# Feed transitions into agent memory, and train the agent
for ts in trajectories[0]:
agent.feed(ts)
step_counter += 1
# Train the agent
train_count = step_counter - (memory_init_size + norm_step)
if train_count > 0:
loss = agent.train()
print('\rINFO - Step {}, loss: {}'.format(step_counter, loss), end='')
q_norm_step=norm_step,
q_mlp_layers=[512,512])
agents.append(agent)
sess.run(tf.global_variables_initializer())
random_agent = RandomAgent(action_num=eval_env.action_num)
env.set_agents(agents)
eval_env.set_agents([agents[0], random_agent])
# Count the number of steps
step_counters = [0 for _ in range(env.player_num)]
# Init a Logger to plot the learning curve
logger = Logger(xlabel='timestep', ylabel='reward', legend='NFSP on Limit Texas Holdem', log_path=log_path, csv_path=csv_path)
for episode in range(episode_num):
# First sample a policy for the episode
for agent in agents:
agent.sample_episode_policy()
# Generate data from the environment
trajectories, _ = env.run(is_training=True)
# Feed transitions into agent memory, and train the agent
for i in range(env.player_num):
for ts in trajectories[i]:
agents[i].feed(ts)
step_counters[i] += 1