Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_init(self):
sess = tf.InteractiveSession()
tf.Variable(0, name='global_step', trainable=False)
agent = DQNAgent(sess=sess,
scope='dqn',
replay_memory_size=0,
replay_memory_init_size=0,
update_target_estimator_every=0,
discount_factor=0,
epsilon_start=0,
epsilon_end=0,
epsilon_decay_steps=0,
batch_size=0,
action_num=2,
state_shape=[1],
norm_step=0,
mlp_layers=[10,10])
self.assertEqual(agent.replay_memory_init_size, 0)
self.assertEqual(agent.update_target_estimator_every, 0)
def test_train(self):
norm_step = 1100
memory_init_size = 100
step_num = 1500
sess = tf.InteractiveSession()
tf.Variable(0, name='global_step', trainable=False)
agent = DQNAgent(sess=sess,
scope='dqn',
replay_memory_size = 500,
replay_memory_init_size=memory_init_size,
update_target_estimator_every=100,
norm_step=norm_step,
state_shape=[2],
mlp_layers=[10,10])
sess.run(tf.global_variables_initializer())
predicted_action = agent.eval_step({'obs': np.random.random_sample((2,)), 'legal_actions': [0, 1]})
self.assertGreaterEqual(predicted_action, 0)
self.assertLessEqual(predicted_action, 1)
for step in range(step_num):
ts = [{'obs': np.random.random_sample((2,)), 'legal_actions': [0, 1]}, np.random.randint(2), 0, {'obs': np.random.random_sample((2,)), 'legal_actions': [0, 1]}, True]
agent.feed(ts)
evaluate_every = 100
save_plot_every = 1000
evaluate_num = 1000
episode_num = 1000000
# Set the the number of steps for collecting normalization statistics
# and intial memory size
memory_init_size = 100
norm_step = 100
# Set a global seed
set_global_seed(1)
with tf.Session() as sess:
# Set agents
agent = DQNAgent(sess,
action_num=env.action_num,
replay_memory_init_size=memory_init_size,
norm_step=norm_step,
state_shape=[2],
mlp_layers=[10,10])
env.set_agents([agent])
# Count the number of steps
step_counter = 0
# Init a Logger to plot the learning curve
logger = Logger(xlabel='eposide', ylabel='reward', legend='DQN on Blackjack', log_path='./experiments/blackjack_dqn_result/log.txt', csv_path='./experiments/blackjack_dqn_result/performance.csv')
for episode in range(episode_num):
# Generate data from the environment
memory_init_size = 1000
norm_step = 1000
# The paths for saving the logs and learning curves
root_path = './experiments/mahjong_dqn_result/'
log_path = root_path + 'log.txt'
csv_path = root_path + 'performance.csv'
figure_path = root_path + 'figures/'
# Set a global seed
set_global_seed(0)
with tf.Session() as sess:
# Set agents
global_step = tf.Variable(0, name='global_step', trainable=False)
agent = DQNAgent(sess,
scope='dqn',
action_num=env.action_num,
replay_memory_size=20000,
replay_memory_init_size=memory_init_size,
norm_step=norm_step,
state_shape=env.state_shape,
mlp_layers=[512, 512])
random_agent = RandomAgent(action_num=eval_env.action_num)
sess.run(tf.global_variables_initializer())
env.set_agents([agent, random_agent, random_agent, random_agent])
eval_env.set_agents([agent, random_agent, random_agent, random_agent])
# Count the number of steps
memory_init_size = 1000
norm_step = 100
# The paths for saving the logs and learning curves
root_path = './experiments/limit_holdem_dqn_result/'
log_path = root_path + 'log.txt'
csv_path = root_path + 'performance.csv'
figure_path = root_path + 'figures/'
# Set a global seed
set_global_seed(0)
with tf.Session() as sess:
# Set agents
global_step = tf.Variable(0, name='global_step', trainable=False)
agent = DQNAgent(sess,
scope='dqn',
action_num=env.action_num,
replay_memory_size=int(1e5),
replay_memory_init_size=memory_init_size,
norm_step=norm_step,
state_shape=env.state_shape,
mlp_layers=[512, 512])
random_agent = RandomAgent(action_num=eval_env.action_num)
sess.run(tf.global_variables_initializer())
env.set_agents([agent, random_agent])
eval_env.set_agents([agent, random_agent])
# Count the number of steps
save_plot_every = 10000
evaluate_num = 10000
episode_num = 1000000
# Set the the number of steps for collecting normalization statistics
# and intial memory size
memory_init_size = 1000
norm_step = 100
# Set a global seed
set_global_seed(0)
with tf.Session() as sess:
# Set agents
global_step = tf.Variable(0, name='global_step', trainable=False)
agent = DQNAgent(sess,
scope='dqn',
action_num=env.action_num,
replay_memory_size=int(1e5),
replay_memory_init_size=memory_init_size,
norm_step=norm_step,
state_shape=[6],
mlp_layers=[128, 128])
random_agent = RandomAgent(action_num=eval_env.action_num)
sess.run(tf.global_variables_initializer())
env.set_agents([agent, random_agent])
eval_env.set_agents([agent, random_agent])
# Count the number of steps
memory_init_size = 1000
norm_step = 1000
# The paths for saving the logs and learning curves
root_path = './experiments/uno_dqn_result/'
log_path = root_path + 'log.txt'
csv_path = root_path + 'performance.csv'
figure_path = root_path + 'figures/'
# Set a global seed
set_global_seed(0)
with tf.Session() as sess:
# Set agents
global_step = tf.Variable(0, name='global_step', trainable=False)
agent = DQNAgent(sess,
scope='dqn',
action_num=env.action_num,
replay_memory_size=20000,
replay_memory_init_size=memory_init_size,
norm_step=norm_step,
state_shape=env.state_shape,
mlp_layers=[512, 512])
random_agent = RandomAgent(action_num=eval_env.action_num)
sess.run(tf.global_variables_initializer())
env.set_agents([agent, random_agent, random_agent])
eval_env.set_agents([agent, random_agent, random_agent])
# Count the number of steps
save_plot_every = 10000
evaluate_num = 10000
episode_num = 1000000
# Set the the number of steps for collecting normalization statistics
# and intial memory size
memory_init_size = 1000
norm_step = 100
# Set a global seed
set_global_seed(0)
with tf.Session() as sess:
# Set agents
global_step = tf.Variable(0, name='global_step', trainable=False)
agent = DQNAgent(sess,
scope='dqn',
action_num=env.action_num,
replay_memory_size=int(1e5),
replay_memory_init_size=memory_init_size,
norm_step=norm_step,
state_shape=[52],
mlp_layers=[512, 512])
random_agent = RandomAgent(action_num=eval_env.action_num)
sess.run(tf.global_variables_initializer())
env.set_agents([agent, random_agent])
eval_env.set_agents([agent, random_agent])
# Count the number of steps
evaluate_every = 200
save_plot_every = 5000
evaluate_num = 200
episode_num = 1000000
# Set the the number of steps for collecting normalization statistics
# and intial memory size
memory_init_size = 1000
norm_step = 100
# Set a global seed
set_global_seed(0)
with tf.Session() as sess:
# Set agents
agent = DQNAgent(sess,
action_num=env.action_num,
replay_memory_size=20000,
replay_memory_init_size=memory_init_size,
norm_step=norm_step,
state_shape=[6, 5, 15],
mlp_layers=[512, 512])
random_agent = RandomAgent(action_num=eval_env.action_num)
env.set_agents([agent, random_agent, random_agent])
eval_env.set_agents([agent, random_agent, random_agent])
# Count the number of steps
step_counter = 0
# Init a Logger to plot the learning curve
self._batch_size = batch_size
self._sl_learning_rate = sl_learning_rate
self._anticipatory_param = anticipatory_param
self._min_buffer_size_to_learn = min_buffer_size_to_learn
self._reservoir_buffer = ReservoirBuffer(reservoir_buffer_capacity)
self._prev_timestep = None
self._prev_action = None
self.evaluate_with = evaluate_with
# Step counter to keep track of learning.
self._step_counter = 0
with tf.variable_scope(scope):
# Inner RL agent
self._rl_agent = DQNAgent(sess, 'dqn', q_replay_memory_size, q_replay_memory_init_size, q_update_target_estimator_every, q_discount_factor, q_epsilon_start, q_epsilon_end, q_epsilon_decay_steps, q_batch_size, action_num, state_shape, q_norm_step, q_mlp_layers, rl_learning_rate)
# Build supervised model
self._build_model()
self.sample_episode_policy()