Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
agent = DQNAgent(sess,
scope='dqn',
action_num=env.action_num,
replay_memory_init_size=memory_init_size,
norm_step=norm_step,
state_shape=env.state_shape,
mlp_layers=[10, 10])
env.set_agents([agent])
eval_env.set_agents([agent])
sess.run(tf.global_variables_initializer())
# Count the number of steps
step_counter = 0
# Init a Logger to plot the learning curve
logger = Logger(xlabel='timestep', ylabel='reward',
legend='DQN on Blackjack', log_path=log_path, csv_path=csv_path)
for episode in range(episode_num // evaluate_every):
# Generate data from the environment
tasks = assign_task(evaluate_every, PROCESS_NUM)
for task in tasks:
INPUT_QUEUE.put((task, True, None, None))
for _ in range(evaluate_every):
trajectories = OUTPUT_QUEUE.get()
# Feed transitions into agent memory, and train
for ts in trajectories[0]:
agent.feed(ts)
step_counter += 1
def deal_cards(self, players):
''' Deal cards to players
Args:
players (list): list of DoudizhuPlayer objects
'''
hand_num = (len(self.deck) - 3) // len(players)
for index, player in enumerate(players):
current_hand = self.deck[index*hand_num:(index+1)*hand_num]
current_hand.sort(key=functools.cmp_to_key(doudizhu_sort_card))
player.set_current_hand(current_hand)
player.initial_hand = cards2str(player.current_hand)
def test_init(self):
sess = tf.InteractiveSession()
tf.Variable(0, name='global_step', trainable=False)
agent = DQNAgent(sess=sess,
scope='dqn',
replay_memory_size=0,
replay_memory_init_size=0,
update_target_estimator_every=0,
discount_factor=0,
epsilon_start=0,
epsilon_end=0,
epsilon_decay_steps=0,
batch_size=0,
action_num=2,
state_shape=[1],
norm_step=0,
mlp_layers=[10,10])
self.assertEqual(agent.replay_memory_init_size, 0)
self.assertEqual(agent.update_target_estimator_every, 0)
def test_init(self):
sess = tf.InteractiveSession()
tf.Variable(0, name='global_step', trainable=False)
agent = NFSPAgent(sess=sess,
scope='nfsp',
action_num=10,
state_shape=[10],
hidden_layers_sizes=[10,10],
q_mlp_layers=[10,10])
self.assertEqual(agent._action_num, 10)
sess.close()
tf.reset_default_graph()
def test_train(self):
env = rlcard.make('leduc-holdem', allow_step_back=True)
agent = CFRAgent(env)
for _ in range(100):
agent.train()
state = {'obs': np.array([1., 1., 0., 0., 0., 0.]), 'legal_actions': [0,2]}
action = agent.eval_step(state)
self.assertIn(action, [0, 2])
def test_is_single(self):
self.assertTrue(is_single([Card('S', 'A')]))
self.assertFalse(is_single([Card('S', 'A'), Card('BJ', '')]))
def test_is_pair(self):
self.assertTrue(is_pair([Card('S', 'A'), Card('D', 'A')]))
self.assertFalse(is_pair([Card('BJ', ''), Card('S', 'A'), Card('D', 'A')]))
def test_take_out_cards(self):
cards = init_54_deck()
remove_cards = [Card('S', 'A'), Card('BJ', '')]
res = take_out_cards(cards, remove_cards)
flag = False
for card in res:
if card.get_index() == 'SA' or card.get_index == 'BJ':
flag = True
self.assertFalse(flag)
self.assertEqual(len(cards), len(init_54_deck()) - 2)
def test_judge_game(self):
players = [Player(0), Player(1)]
players[0].in_chips = 10
players[1].in_chips = 10
# Test hand is equal
players[0].hand = Card('S', 'J')
players[1].hand = Card('H', 'J')
public_card = Card('S', 'Q')
payoffs = Judger.judge_game(players, public_card)
self.assertEqual(payoffs[0], 0)
self.assertEqual(payoffs[1], 0)
# Test one player get a pair
players[0].hand = Card('S', 'J')
players[1].hand = Card('S', 'Q')
public_card = Card('H', 'J')
payoffs = Judger.judge_game(players, public_card)
self.assertEqual(payoffs[0], 10.0)
self.assertEqual(payoffs[1], -10.0)
# Other cases
# Test one player get a pair
players[0].hand = Card('S', 'J')
players[1].hand = Card('S', 'Q')
public_card = Card('H', 'K')
payoffs = Judger.judge_game(players, public_card)
self.assertEqual(payoffs[0], -10.0)
self.assertEqual(payoffs[1], 10.0)
def test_is_pair(self):
self.assertTrue(is_pair([Card('S', 'A'), Card('D', 'A')]))
self.assertFalse(is_pair([Card('BJ', ''), Card('S', 'A'), Card('D', 'A')]))