How to use the rlcard.utils.logger.Logger function in rlcard

To help you get started, we’ve selected a few rlcard examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github datamllab / rlcard / examples / blackjack_dqn_multi_process.py View on Github external
agent = DQNAgent(sess,
						 scope='dqn',
						 action_num=env.action_num,
						 replay_memory_init_size=memory_init_size,
						 norm_step=norm_step,
						 state_shape=env.state_shape,
						 mlp_layers=[10, 10])
		env.set_agents([agent])
		eval_env.set_agents([agent])
		sess.run(tf.global_variables_initializer())

		# Count the number of steps
		step_counter = 0

		# Init a Logger to plot the learning curve
		logger = Logger(xlabel='timestep', ylabel='reward',
						legend='DQN on Blackjack', log_path=log_path, csv_path=csv_path)

		for episode in range(episode_num // evaluate_every):

			# Generate data from the environment
			tasks = assign_task(evaluate_every, PROCESS_NUM)
			for task in tasks:
				INPUT_QUEUE.put((task, True, None, None))
			for _ in range(evaluate_every):
				trajectories = OUTPUT_QUEUE.get()

				# Feed transitions into agent memory, and train
				for ts in trajectories[0]:
					agent.feed(ts)
					step_counter += 1
github datamllab / rlcard / tests / utils / test_logger.py View on Github external
def test_close_file(self):
        logger = Logger(xlabel="x", ylabel="y", legend="test", log_path="./newtest/test_log.txt",csv_path="./newtest/test_csv.csv")
        logger.close_file()
        self.assertTrue(os.path.exists('./newtest/'))
github datamllab / rlcard / tests / utils / test_logger.py View on Github external
def test_make_plot(self):
        logger = Logger(xlabel="x", ylabel="y", legend="test")
        for x in range(10):
            logger.add_point(x=x, y=x*x)
        self.assertEqual(9*9, logger.ys[9])
        logger.make_plot(save_path='./newtest/test.png')
github datamllab / rlcard / examples / limit_holdem_dqn.py View on Github external
norm_step=norm_step,
                     state_shape=env.state_shape,
                     mlp_layers=[512, 512])

    random_agent = RandomAgent(action_num=eval_env.action_num)

    sess.run(tf.global_variables_initializer())

    env.set_agents([agent, random_agent])
    eval_env.set_agents([agent, random_agent])

    # Count the number of steps
    step_counter = 0

    # Init a Logger to plot the learning curve
    logger = Logger(xlabel='timestep', ylabel='reward', legend='DQN on Limit Texas Holdem', log_path=log_path, csv_path=csv_path)

    for episode in range(episode_num):

        # Generate data from the environment
        trajectories, _ = env.run(is_training=True)

        # Feed transitions into agent memory, and train the agent
        for ts in trajectories[0]:
            agent.feed(ts)
            step_counter += 1

            # Train the agent
            train_count = step_counter - (memory_init_size + norm_step)
            if train_count > 0:
                loss = agent.train()
                print('\rINFO - Step {}, loss: {}'.format(step_counter, loss), end='')
github datamllab / rlcard / examples / nolimit_holdem_nfsp.py View on Github external
q_norm_step=norm_step,
                          q_mlp_layers=[512,512])
        agents.append(agent)

    sess.run(tf.global_variables_initializer())

    random_agent = RandomAgent(action_num=eval_env.action_num)

    env.set_agents(agents)
    eval_env.set_agents([agents[0], random_agent])

    # Count the number of steps
    step_counters = [0 for _ in range(env.player_num)]

    # Init a Logger to plot the learning curve
    logger = Logger(xlabel='timestep', ylabel='reward', legend='NFSP on No-Limit Texas Holdem', log_path='./experiments/nolimit_holdem_nfsp_result/log.txt', csv_path='./experiments/nolimit_holdem_nfsp_result/performance.csv')

    for episode in range(episode_num):

        # First sample a policy for the episode
        for agent in agents:
            agent.sample_episode_policy()

        # Generate data from the environment
        trajectories, _ = env.run(is_training=True)

        # Feed transitions into agent memory, and train the agent
        for i in range(env.player_num):
            for ts in trajectories[i]:
                agents[i].feed(ts)
                step_counters[i] += 1
github datamllab / rlcard / examples / uno_dqn.py View on Github external
norm_step=norm_step,
                     state_shape=env.state_shape,
                     mlp_layers=[512, 512])

    random_agent = RandomAgent(action_num=eval_env.action_num)

    sess.run(tf.global_variables_initializer())

    env.set_agents([agent, random_agent, random_agent])
    eval_env.set_agents([agent, random_agent, random_agent])

    # Count the number of steps
    step_counter = 0

    # Init a Logger to plot the learning curve
    logger = Logger(xlabel='timestep', ylabel='reward', legend='DQN on UNO', log_path=log_path, csv_path=csv_path)

    for episode in range(episode_num):

        # Generate data from the environment
        trajectories, _ = env.run(is_training=True)

        # Feed transitions into agent memory, and train the agent
        for ts in trajectories[0]:
            agent.feed(ts)
            step_counter += 1

            # Train the agent
            train_count = step_counter - (memory_init_size + norm_step)
            if train_count > 0:
                loss = agent.train()
                print('\rINFO - Step {}, loss: {}'.format(step_counter, loss), end='')
github datamllab / rlcard / examples / leduc_holdem_nfsp.py View on Github external
q_norm_step=norm_step,
                          q_mlp_layers=[128,128])
        agents.append(agent)

    sess.run(tf.global_variables_initializer())

    random_agent = RandomAgent(action_num=eval_env.action_num)

    env.set_agents(agents)
    eval_env.set_agents([agents[0], random_agent])

    # Count the number of steps
    step_counters = [0 for _ in range(env.player_num)]

    # Init a Logger to plot the learning curve
    logger = Logger(xlabel='timestep', ylabel='reward', legend='NFSP on Leduc Holdem', log_path='./experiments/leduc_holdem_nfsp_result/log.txt', csv_path='./experiments/leduc_holdem_nfsp_result/performance.csv')

    for episode in range(episode_num):

        # First sample a policy for the episode
        for agent in agents:
            agent.sample_episode_policy()

        # Generate data from the environment
        trajectories, _ = env.run(is_training=True)

        # Feed transitions into agent memory, and train the agent
        for i in range(env.player_num):
            for ts in trajectories[i]:
                agents[i].feed(ts)
                step_counters[i] += 1
github datamllab / rlcard / examples / blackjack_dqn.py View on Github external
with tf.Session() as sess:
    # Set agents
    agent = DQNAgent(sess,
                       action_num=env.action_num,
                       replay_memory_init_size=memory_init_size,
                       norm_step=norm_step,
                       state_shape=[2],
                       mlp_layers=[10,10])
    env.set_agents([agent])

    # Count the number of steps
    step_counter = 0

    # Init a Logger to plot the learning curve
    logger = Logger(xlabel='eposide', ylabel='reward', legend='DQN on Blackjack', log_path='./experiments/blackjack_dqn_result/log.txt', csv_path='./experiments/blackjack_dqn_result/performance.csv')

    for episode in range(episode_num):

        # Generate data from the environment
        trajectories, _ = env.run(is_training=True)

        # Feed transitions into agent memory, and train
        for ts in trajectories[0]:
            agent.feed(ts)
            step_counter += 1

            # Train the agent
            if step_counter > memory_init_size + norm_step:
                agent.train()

        # Evaluate the performance
github datamllab / rlcard / examples / leduc_holdem_dqn.py View on Github external
norm_step=norm_step,
                     state_shape=[6],
                     mlp_layers=[128, 128])

    random_agent = RandomAgent(action_num=eval_env.action_num)

    sess.run(tf.global_variables_initializer())

    env.set_agents([agent, random_agent])
    eval_env.set_agents([agent, random_agent])

    # Count the number of steps
    step_counter = 0

    # Init a Logger to plot the learning curve
    logger = Logger(xlabel='timestep', ylabel='reward', legend='DQN on Leduc Holdem', log_path='./experiments/leduc_holdem_dqn_result/log.txt', csv_path='./experiments/leduc_holdem_dqn_result/performance.csv')

    for episode in range(episode_num):

        # Generate data from the environment
        trajectories, _ = env.run(is_training=True)

        # Feed transitions into agent memory, and train the agent
        for ts in trajectories[0]:
            agent.feed(ts)
            step_counter += 1

            # Train the agent
            train_count = step_counter - (memory_init_size + norm_step)
            if train_count > 0:
                loss = agent.train()
                print('\rINFO - Step {}, loss: {}'.format(step_counter, loss), end='')
github datamllab / rlcard / examples / limit_holdem_nfsp.py View on Github external
q_norm_step=norm_step,
                          q_mlp_layers=[512,512])
        agents.append(agent)

    sess.run(tf.global_variables_initializer())

    random_agent = RandomAgent(action_num=eval_env.action_num)

    env.set_agents(agents)
    eval_env.set_agents([agents[0], random_agent])

    # Count the number of steps
    step_counters = [0 for _ in range(env.player_num)]

    # Init a Logger to plot the learning curve
    logger = Logger(xlabel='timestep', ylabel='reward', legend='NFSP on Limit Texas Holdem', log_path=log_path, csv_path=csv_path)

    for episode in range(episode_num):

        # First sample a policy for the episode
        for agent in agents:
            agent.sample_episode_policy()

        # Generate data from the environment
        trajectories, _ = env.run(is_training=True)

        # Feed transitions into agent memory, and train the agent
        for i in range(env.player_num):
            for ts in trajectories[i]:
                agents[i].feed(ts)
                step_counters[i] += 1