How to use the parl.utils.tensorboard.add_scalar function in parl

To help you get started, we’ve selected a few parl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github PaddlePaddle / PARL / examples / DQN / train.py View on Github external
pbar.update(steps)

    # train
    test_flag = 0
    pbar = tqdm(total=args.train_total_steps)
    total_steps = 0
    max_reward = None
    while total_steps < args.train_total_steps:
        # start epoch
        total_reward, steps, loss = run_train_episode(env, agent, rpm)
        total_steps += steps
        pbar.set_description('[train]exploration:{}'.format(agent.exploration))
        tensorboard.add_scalar('dqn/score', total_reward, total_steps)
        tensorboard.add_scalar('dqn/loss', loss,
                               total_steps)  # mean of total loss
        tensorboard.add_scalar('dqn/exploration', agent.exploration,
                               total_steps)
        pbar.update(steps)

        if total_steps // args.test_every_steps >= test_flag:
            while total_steps // args.test_every_steps >= test_flag:
                test_flag += 1
            pbar.write("testing")
            eval_rewards = []
            for _ in tqdm(range(3), desc='eval agent'):
                eval_reward = run_evaluate_episode(test_env, agent)
                eval_rewards.append(eval_reward)
            logger.info(
                "eval_agent done, (steps, eval_reward): ({}, {})".format(
                    total_steps, np.mean(eval_rewards)))
            eval_test = np.mean(eval_rewards)
            tensorboard.add_scalar('dqn/eval', eval_test, total_steps)
github PaddlePaddle / PARL / examples / DQN / train.py View on Github external
total_steps)
        pbar.update(steps)

        if total_steps // args.test_every_steps >= test_flag:
            while total_steps // args.test_every_steps >= test_flag:
                test_flag += 1
            pbar.write("testing")
            eval_rewards = []
            for _ in tqdm(range(3), desc='eval agent'):
                eval_reward = run_evaluate_episode(test_env, agent)
                eval_rewards.append(eval_reward)
            logger.info(
                "eval_agent done, (steps, eval_reward): ({}, {})".format(
                    total_steps, np.mean(eval_rewards)))
            eval_test = np.mean(eval_rewards)
            tensorboard.add_scalar('dqn/eval', eval_test, total_steps)

    pbar.close()
github PaddlePaddle / PARL / examples / DQN / train.py View on Github external
total=MEMORY_WARMUP_SIZE, desc='[Replay Memory Warm Up]') as pbar:
        while rpm.size() < MEMORY_WARMUP_SIZE:
            total_reward, steps, _ = run_train_episode(env, agent, rpm)
            pbar.update(steps)

    # train
    test_flag = 0
    pbar = tqdm(total=args.train_total_steps)
    total_steps = 0
    max_reward = None
    while total_steps < args.train_total_steps:
        # start epoch
        total_reward, steps, loss = run_train_episode(env, agent, rpm)
        total_steps += steps
        pbar.set_description('[train]exploration:{}'.format(agent.exploration))
        tensorboard.add_scalar('dqn/score', total_reward, total_steps)
        tensorboard.add_scalar('dqn/loss', loss,
                               total_steps)  # mean of total loss
        tensorboard.add_scalar('dqn/exploration', agent.exploration,
                               total_steps)
        pbar.update(steps)

        if total_steps // args.test_every_steps >= test_flag:
            while total_steps // args.test_every_steps >= test_flag:
                test_flag += 1
            pbar.write("testing")
            eval_rewards = []
            for _ in tqdm(range(3), desc='eval agent'):
                eval_reward = run_evaluate_episode(test_env, agent)
                eval_rewards.append(eval_reward)
            logger.info(
                "eval_agent done, (steps, eval_reward): ({}, {})".format(
github PaddlePaddle / PARL / examples / NeurIPS2019-Learn-to-Move-Challenge / train.py View on Github external
if self.env_reward_stat.count > 500:
                    tensorboard.add_scalar('recent_env_reward',
                                           self.env_reward_stat.mean,
                                           self.total_steps)
                    tensorboard.add_scalar('recent_shaping_reward',
                                           self.shaping_reward_stat.mean,
                                           self.total_steps)
                if self.critic_loss_stat.count > 500:
                    tensorboard.add_scalar('recent_critic_loss',
                                           self.critic_loss_stat.mean,
                                           self.total_steps)
                tensorboard.add_scalar('episode_length', n, self.total_steps)
                tensorboard.add_scalar('max_env_reward', self.max_env_reward,
                                       self.total_steps)
                tensorboard.add_scalar('ready_actor_num',
                                       self.ready_actor_queue.qsize(),
                                       self.total_steps)
                tensorboard.add_scalar('episode_time', episode_time,
                                       self.total_steps)

            self.noiselevel = self.noiselevel * NOISE_DECAY
github PaddlePaddle / PARL / examples / NeurIPS2019-Learn-to-Move-Challenge / train.py View on Github external
self.max_env_reward = max(self.max_env_reward,
                                          episode_env_reward)

                if self.env_reward_stat.count > 500:
                    tensorboard.add_scalar('recent_env_reward',
                                           self.env_reward_stat.mean,
                                           self.total_steps)
                    tensorboard.add_scalar('recent_shaping_reward',
                                           self.shaping_reward_stat.mean,
                                           self.total_steps)
                if self.critic_loss_stat.count > 500:
                    tensorboard.add_scalar('recent_critic_loss',
                                           self.critic_loss_stat.mean,
                                           self.total_steps)
                tensorboard.add_scalar('episode_length', n, self.total_steps)
                tensorboard.add_scalar('max_env_reward', self.max_env_reward,
                                       self.total_steps)
                tensorboard.add_scalar('ready_actor_num',
                                       self.ready_actor_queue.qsize(),
                                       self.total_steps)
                tensorboard.add_scalar('episode_time', episode_time,
                                       self.total_steps)

            self.noiselevel = self.noiselevel * NOISE_DECAY
github PaddlePaddle / PARL / examples / NeurIPS2019-Learn-to-Move-Challenge / train.py View on Github external
if actor_state.ident % 3 == 2:  # trajectory without noise
                self.env_reward_stat.add(episode_env_reward)
                self.shaping_reward_stat.add(episode_shaping_reward)
                self.max_env_reward = max(self.max_env_reward,
                                          episode_env_reward)

                if self.env_reward_stat.count > 500:
                    tensorboard.add_scalar('recent_env_reward',
                                           self.env_reward_stat.mean,
                                           self.total_steps)
                    tensorboard.add_scalar('recent_shaping_reward',
                                           self.shaping_reward_stat.mean,
                                           self.total_steps)
                if self.critic_loss_stat.count > 500:
                    tensorboard.add_scalar('recent_critic_loss',
                                           self.critic_loss_stat.mean,
                                           self.total_steps)
                tensorboard.add_scalar('episode_length', n, self.total_steps)
                tensorboard.add_scalar('max_env_reward', self.max_env_reward,
                                       self.total_steps)
                tensorboard.add_scalar('ready_actor_num',
                                       self.ready_actor_queue.qsize(),
                                       self.total_steps)
                tensorboard.add_scalar('episode_time', episode_time,
                                       self.total_steps)

            self.noiselevel = self.noiselevel * NOISE_DECAY
github PaddlePaddle / PARL / examples / ES / train.py View on Github external
def log_metrics(self, metrics):
        logger.info(metrics)
        for k, v in metrics.items():
            if v is not None:
                tensorboard.add_scalar(k, v, self.sample_total_steps)
github PaddlePaddle / PARL / examples / NeurIPS2019-Learn-to-Move-Challenge / train.py View on Github external
with self.memory_lock:
            self.total_steps += n
            self.add_episode_rpm(episode_rpm)

            if actor_state.ident % 3 == 2:  # trajectory without noise
                self.env_reward_stat.add(episode_env_reward)
                self.shaping_reward_stat.add(episode_shaping_reward)
                self.max_env_reward = max(self.max_env_reward,
                                          episode_env_reward)

                if self.env_reward_stat.count > 500:
                    tensorboard.add_scalar('recent_env_reward',
                                           self.env_reward_stat.mean,
                                           self.total_steps)
                    tensorboard.add_scalar('recent_shaping_reward',
                                           self.shaping_reward_stat.mean,
                                           self.total_steps)
                if self.critic_loss_stat.count > 500:
                    tensorboard.add_scalar('recent_critic_loss',
                                           self.critic_loss_stat.mean,
                                           self.total_steps)
                tensorboard.add_scalar('episode_length', n, self.total_steps)
                tensorboard.add_scalar('max_env_reward', self.max_env_reward,
                                       self.total_steps)
                tensorboard.add_scalar('ready_actor_num',
                                       self.ready_actor_queue.qsize(),
                                       self.total_steps)
                tensorboard.add_scalar('episode_time', episode_time,
                                       self.total_steps)

            self.noiselevel = self.noiselevel * NOISE_DECAY