How to use parl - 10 common examples

To help you get started, we’ve selected a few parl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github PaddlePaddle / PARL / examples / PPO / train.py View on Github external
test_flag = 0
    total_steps = 0
    while total_steps < args.train_total_steps:
        trajectories = collect_trajectories(
            env, agent, scaler, episodes=args.episodes_per_batch)
        total_steps += sum([t['obs'].shape[0] for t in trajectories])
        total_train_rewards = sum([np.sum(t['rewards']) for t in trajectories])

        train_obs, train_actions, train_advantages, train_discount_sum_rewards = build_train_data(
            trajectories, agent)

        policy_loss, kl = agent.policy_learn(train_obs, train_actions,
                                             train_advantages)
        value_loss = agent.value_learn(train_obs, train_discount_sum_rewards)

        logger.info(
            'Steps {}, Train reward: {}, Policy loss: {}, KL: {}, Value loss: {}'
            .format(total_steps, total_train_rewards / args.episodes_per_batch,
                    policy_loss, kl, value_loss))
        if total_steps // args.test_every_steps >= test_flag:
            while total_steps // args.test_every_steps >= test_flag:
                test_flag += 1
            eval_reward = run_evaluate_episode(env, agent, scaler)
            logger.info('Steps {}, Evaluate reward: {}'.format(
                total_steps, eval_reward))
github PaddlePaddle / PARL / examples / DQN / train.py View on Github external
pbar.update(steps)

    # train
    test_flag = 0
    pbar = tqdm(total=args.train_total_steps)
    total_steps = 0
    max_reward = None
    while total_steps < args.train_total_steps:
        # start epoch
        total_reward, steps, loss = run_train_episode(env, agent, rpm)
        total_steps += steps
        pbar.set_description('[train]exploration:{}'.format(agent.exploration))
        tensorboard.add_scalar('dqn/score', total_reward, total_steps)
        tensorboard.add_scalar('dqn/loss', loss,
                               total_steps)  # mean of total loss
        tensorboard.add_scalar('dqn/exploration', agent.exploration,
                               total_steps)
        pbar.update(steps)

        if total_steps // args.test_every_steps >= test_flag:
            while total_steps // args.test_every_steps >= test_flag:
                test_flag += 1
            pbar.write("testing")
            eval_rewards = []
            for _ in tqdm(range(3), desc='eval agent'):
                eval_reward = run_evaluate_episode(test_env, agent)
                eval_rewards.append(eval_reward)
            logger.info(
                "eval_agent done, (steps, eval_reward): ({}, {})".format(
                    total_steps, np.mean(eval_rewards)))
            eval_test = np.mean(eval_rewards)
            tensorboard.add_scalar('dqn/eval', eval_test, total_steps)
github PaddlePaddle / PARL / examples / DQN / train.py View on Github external
total_steps)
        pbar.update(steps)

        if total_steps // args.test_every_steps >= test_flag:
            while total_steps // args.test_every_steps >= test_flag:
                test_flag += 1
            pbar.write("testing")
            eval_rewards = []
            for _ in tqdm(range(3), desc='eval agent'):
                eval_reward = run_evaluate_episode(test_env, agent)
                eval_rewards.append(eval_reward)
            logger.info(
                "eval_agent done, (steps, eval_reward): ({}, {})".format(
                    total_steps, np.mean(eval_rewards)))
            eval_test = np.mean(eval_rewards)
            tensorboard.add_scalar('dqn/eval', eval_test, total_steps)

    pbar.close()
github PaddlePaddle / PARL / examples / DQN / train.py View on Github external
total=MEMORY_WARMUP_SIZE, desc='[Replay Memory Warm Up]') as pbar:
        while rpm.size() < MEMORY_WARMUP_SIZE:
            total_reward, steps, _ = run_train_episode(env, agent, rpm)
            pbar.update(steps)

    # train
    test_flag = 0
    pbar = tqdm(total=args.train_total_steps)
    total_steps = 0
    max_reward = None
    while total_steps < args.train_total_steps:
        # start epoch
        total_reward, steps, loss = run_train_episode(env, agent, rpm)
        total_steps += steps
        pbar.set_description('[train]exploration:{}'.format(agent.exploration))
        tensorboard.add_scalar('dqn/score', total_reward, total_steps)
        tensorboard.add_scalar('dqn/loss', loss,
                               total_steps)  # mean of total loss
        tensorboard.add_scalar('dqn/exploration', agent.exploration,
                               total_steps)
        pbar.update(steps)

        if total_steps // args.test_every_steps >= test_flag:
            while total_steps // args.test_every_steps >= test_flag:
                test_flag += 1
            pbar.write("testing")
            eval_rewards = []
            for _ in tqdm(range(3), desc='eval agent'):
                eval_reward = run_evaluate_episode(test_env, agent)
                eval_rewards.append(eval_reward)
            logger.info(
                "eval_agent done, (steps, eval_reward): ({}, {})".format(
github PaddlePaddle / PARL / examples / NeurIPS2019-Learn-to-Move-Challenge / train.py View on Github external
def _new_ready_actor(self):
        """ 

        The actor is ready to start new episode,
        but blocking until training thread call actor_ready_event.set()
        """
        actor_ready_event = threading.Event()
        self.ready_actor_queue.put(actor_ready_event)
        logger.info(
            "[new_avaliabe_actor] approximate size of ready actors:{}".format(
                self.ready_actor_queue.qsize()))
        actor_ready_event.wait()
github PaddlePaddle / PARL / parl / remote / job.py View on Github external
for file, code in pyfiles['python_files'].items():
                file = os.path.join(envdir, file)
                with open(file, 'wb') as code_file:
                    code_file.write(code)

            # save other files to current directory
            for file, content in pyfiles['other_files'].items():
                # create directory (i.e. ./rom_files/)
                if '/' in file:
                    try:
                        os.makedirs(os.path.join(*file.rsplit('/')[:-1]))
                    except OSError as e:
                        pass
                with open(file, 'wb') as f:
                    f.write(content)
            logger.info('[job] reply')
            reply_socket.send_multipart([remote_constants.NORMAL_TAG])
            return envdir
        else:
            logger.error("NotImplementedError:{}, received tag:{}".format(
                job_address, ))
            raise NotImplementedError
github PaddlePaddle / PARL / parl / utils / machine_info.py View on Github external
Returns:
        gpu_count: int
    """

    gpu_count = 0

    env_cuda_devices = os.environ.get('CUDA_VISIBLE_DEVICES', None)
    if env_cuda_devices is not None:
        assert isinstance(env_cuda_devices, str)
        try:
            if not env_cuda_devices:
                return 0
            gpu_count = len(
                [x for x in env_cuda_devices.split(',') if int(x) >= 0])
            logger.info(
                'CUDA_VISIBLE_DEVICES found gpu count: {}'.format(gpu_count))
        except:
            logger.info('Cannot find available GPU devices, using CPU now.')
            gpu_count = 0
    else:
        try:
            gpu_count = str(subprocess.check_output(["nvidia-smi",
                                                     "-L"])).count('UUID')
            logger.info('nvidia-smi -L found gpu count: {}'.format(gpu_count))
        except:
            logger.info('Cannot find available GPU devices, using CPU now.')
            gpu_count = 0
    return gpu_count
github PaddlePaddle / PARL / parl / remote / job.py View on Github external
file = os.path.join(envdir, file)
                with open(file, 'wb') as code_file:
                    code_file.write(code)

            # save other files to current directory
            for file, content in pyfiles['other_files'].items():
                # create directory (i.e. ./rom_files/)
                if '/' in file:
                    try:
                        os.makedirs(os.path.join(*file.rsplit('/')[:-1]))
                    except OSError as e:
                        pass
                with open(file, 'wb') as f:
                    f.write(content)
            logger.info('[job] reply')
            reply_socket.send_multipart([remote_constants.NORMAL_TAG])
            return envdir
        else:
            logger.error("NotImplementedError:{}, received tag:{}".format(
                job_address, ))
            raise NotImplementedError
github PaddlePaddle / PARL / parl / remote / client.py View on Github external
self.submit_job_socket = self.ctx.socket(zmq.REQ)
        self.submit_job_socket.linger = 0
        self.submit_job_socket.setsockopt(
            zmq.RCVTIMEO, remote_constants.HEARTBEAT_TIMEOUT_S * 1000)
        self.submit_job_socket.connect("tcp://{}".format(master_address))
        self.start_time = time.time()
        thread = threading.Thread(target=self._reply_heartbeat)
        thread.setDaemon(True)
        thread.start()
        self.heartbeat_socket_initialized.wait()

        # check if the master is connected properly
        try:
            self.submit_job_socket.send_multipart([
                remote_constants.CLIENT_CONNECT_TAG,
                to_byte(self.heartbeat_master_address),
                to_byte(socket.gethostname())
            ])
            _ = self.submit_job_socket.recv_multipart()
        except zmq.error.Again as e:
            logger.warning("[Client] Can not connect to the master, please "
                           "check if master is started and ensure the input "
                           "address {} is correct.".format(master_address))
            self.master_is_alive = False
            raise Exception("Client can not connect to the master, please "
                            "check if master is started and ensure the input "
                            "address {} is correct.".format(master_address))
github PaddlePaddle / PARL / parl / remote / job.py View on Github external
logger.error(
                        "Memory used by this job exceeds {}. This job will exist."
                        .format(self.max_memory))
                    time.sleep(5)
                    socket.close(0)
                    os._exit(1)
            except zmq.error.Again as e:
                logger.warning(
                    "[Job] Cannot connect to the client. This job will exit and inform the worker."
                )
                break
        socket.close(0)
        with self.lock:
            self.kill_job_socket.send_multipart(
                [remote_constants.KILLJOB_TAG,
                 to_byte(self.job_address)])
            try:
                _ = self.kill_job_socket.recv_multipart()
            except zmq.error.Again as e:
                pass
        logger.warning("[Job]lost connection with the client, will exit")
        os._exit(1)