How to use rlcard - 10 common examples

To help you get started, we’ve selected a few rlcard examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github datamllab / rlcard / examples / blackjack_dqn_multi_process.py View on Github external
agent = DQNAgent(sess,
						 scope='dqn',
						 action_num=env.action_num,
						 replay_memory_init_size=memory_init_size,
						 norm_step=norm_step,
						 state_shape=env.state_shape,
						 mlp_layers=[10, 10])
		env.set_agents([agent])
		eval_env.set_agents([agent])
		sess.run(tf.global_variables_initializer())

		# Count the number of steps
		step_counter = 0

		# Init a Logger to plot the learning curve
		logger = Logger(xlabel='timestep', ylabel='reward',
						legend='DQN on Blackjack', log_path=log_path, csv_path=csv_path)

		for episode in range(episode_num // evaluate_every):

			# Generate data from the environment
			tasks = assign_task(evaluate_every, PROCESS_NUM)
			for task in tasks:
				INPUT_QUEUE.put((task, True, None, None))
			for _ in range(evaluate_every):
				trajectories = OUTPUT_QUEUE.get()

				# Feed transitions into agent memory, and train
				for ts in trajectories[0]:
					agent.feed(ts)
					step_counter += 1
github datamllab / rlcard / rlcard / games / doudizhu / dealer.py View on Github external
def deal_cards(self, players):
        ''' Deal cards to players

        Args:
            players (list): list of DoudizhuPlayer objects
        '''
        hand_num = (len(self.deck) - 3) // len(players)
        for index, player in enumerate(players):
            current_hand = self.deck[index*hand_num:(index+1)*hand_num]
            current_hand.sort(key=functools.cmp_to_key(doudizhu_sort_card))
            player.set_current_hand(current_hand)
            player.initial_hand = cards2str(player.current_hand)
github datamllab / rlcard / tests / agents / test_dqn.py View on Github external
def test_init(self):

        sess = tf.InteractiveSession()
        tf.Variable(0, name='global_step', trainable=False)

        agent = DQNAgent(sess=sess,
                         scope='dqn',
                         replay_memory_size=0,
                         replay_memory_init_size=0,
                         update_target_estimator_every=0,
                         discount_factor=0,
                         epsilon_start=0,
                         epsilon_end=0,
                         epsilon_decay_steps=0,
                         batch_size=0,
                         action_num=2,
                         state_shape=[1],
                         norm_step=0,
                         mlp_layers=[10,10])

        self.assertEqual(agent.replay_memory_init_size, 0)
        self.assertEqual(agent.update_target_estimator_every, 0)
github datamllab / rlcard / tests / agents / test_nfsp.py View on Github external
def test_init(self):

        sess = tf.InteractiveSession()
        tf.Variable(0, name='global_step', trainable=False)

        agent = NFSPAgent(sess=sess,
                         scope='nfsp',
                         action_num=10,
                         state_shape=[10],
                         hidden_layers_sizes=[10,10],
                         q_mlp_layers=[10,10])

        self.assertEqual(agent._action_num, 10)

        sess.close()
        tf.reset_default_graph()
github datamllab / rlcard / tests / agents / test_cfr.py View on Github external
def test_train(self):

        env = rlcard.make('leduc-holdem', allow_step_back=True)
        agent = CFRAgent(env)

        for _ in range(100):
            agent.train()

        state = {'obs': np.array([1., 1., 0., 0., 0., 0.]), 'legal_actions': [0,2]}
        action = agent.eval_step(state)

        self.assertIn(action, [0, 2])
github datamllab / rlcard / tests / utils / test_utils.py View on Github external
def test_is_single(self):
        self.assertTrue(is_single([Card('S', 'A')]))
        self.assertFalse(is_single([Card('S', 'A'), Card('BJ', '')]))
github datamllab / rlcard / tests / utils / test_utils.py View on Github external
def test_is_pair(self):
        self.assertTrue(is_pair([Card('S', 'A'), Card('D', 'A')]))
        self.assertFalse(is_pair([Card('BJ', ''), Card('S', 'A'), Card('D', 'A')]))
github datamllab / rlcard / tests / utils / test_utils.py View on Github external
def test_take_out_cards(self):
        cards = init_54_deck()
        remove_cards = [Card('S', 'A'), Card('BJ', '')]
        res = take_out_cards(cards, remove_cards)
        flag = False
        for card in res:
            if card.get_index() == 'SA' or card.get_index == 'BJ':
                flag = True
        self.assertFalse(flag)
        self.assertEqual(len(cards), len(init_54_deck()) - 2)
github datamllab / rlcard / tests / games / test_leducholdem_game.py View on Github external
def test_judge_game(self):
        players = [Player(0), Player(1)]
        players[0].in_chips = 10
        players[1].in_chips = 10

        # Test hand is equal
        players[0].hand = Card('S', 'J')
        players[1].hand = Card('H', 'J')
        public_card = Card('S', 'Q')
        payoffs = Judger.judge_game(players, public_card)
        self.assertEqual(payoffs[0], 0)
        self.assertEqual(payoffs[1], 0)

        # Test one player get a pair
        players[0].hand = Card('S', 'J')
        players[1].hand = Card('S', 'Q')
        public_card = Card('H', 'J')
        payoffs = Judger.judge_game(players, public_card)
        self.assertEqual(payoffs[0], 10.0)
        self.assertEqual(payoffs[1], -10.0)

        # Other cases
        # Test one player get a pair
        players[0].hand = Card('S', 'J')
        players[1].hand = Card('S', 'Q')
        public_card = Card('H', 'K')
        payoffs = Judger.judge_game(players, public_card)
        self.assertEqual(payoffs[0], -10.0)
        self.assertEqual(payoffs[1], 10.0)
github datamllab / rlcard / tests / utils / test_utils.py View on Github external
def test_is_pair(self):
        self.assertTrue(is_pair([Card('S', 'A'), Card('D', 'A')]))
        self.assertFalse(is_pair([Card('BJ', ''), Card('S', 'A'), Card('D', 'A')]))