How to use the parl.utils.logger.warning function in parl

To help you get started, we’ve selected a few parl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github PaddlePaddle / PARL / parl / remote / job.py View on Github external
os._exit(1)
            except zmq.error.Again as e:
                logger.warning(
                    "[Job] Cannot connect to the client. This job will exit and inform the worker."
                )
                break
        socket.close(0)
        with self.lock:
            self.kill_job_socket.send_multipart(
                [remote_constants.KILLJOB_TAG,
                 to_byte(self.job_address)])
            try:
                _ = self.kill_job_socket.recv_multipart()
            except zmq.error.Again as e:
                pass
        logger.warning("[Job]lost connection with the client, will exit")
        os._exit(1)
github PaddlePaddle / PARL / parl / remote / job.py View on Github external
def _reply_worker_heartbeat(self, socket):
        """create a socket that replies heartbeat signals from the worker.
        If the worker has exited, the job will exit automatically.
        """
        while True:
            try:
                message = socket.recv_multipart()
                socket.send_multipart([remote_constants.HEARTBEAT_TAG])
            except zmq.error.Again as e:
                logger.warning("[Job] Cannot connect to the worker{}. ".format(
                    self.worker_address) + "Job will quit.")
                break
        socket.close(0)
        os._exit(1)
github PaddlePaddle / PARL / parl / remote / client.py View on Github external
self.start_time = time.time()
        thread = threading.Thread(target=self._reply_heartbeat)
        thread.setDaemon(True)
        thread.start()
        self.heartbeat_socket_initialized.wait()

        # check if the master is connected properly
        try:
            self.submit_job_socket.send_multipart([
                remote_constants.CLIENT_CONNECT_TAG,
                to_byte(self.heartbeat_master_address),
                to_byte(socket.gethostname())
            ])
            _ = self.submit_job_socket.recv_multipart()
        except zmq.error.Again as e:
            logger.warning("[Client] Can not connect to the master, please "
                           "check if master is started and ensure the input "
                           "address {} is correct.".format(master_address))
            self.master_is_alive = False
            raise Exception("Client can not connect to the master, please "
                            "check if master is started and ensure the input "
                            "address {} is correct.".format(master_address))
github PaddlePaddle / PARL / parl / remote / client.py View on Github external
self.heartbeat_master_address = "{}:{}".format(get_ip_address(),
                                                       heartbeat_master_port)
        self.heartbeat_socket_initialized.set()
        while self.client_is_alive and self.master_is_alive:
            try:
                message = socket.recv_multipart()
                elapsed_time = datetime.timedelta(
                    seconds=int(time.time() - self.start_time))
                socket.send_multipart([
                    remote_constants.HEARTBEAT_TAG,
                    to_byte(self.executable_path),
                    to_byte(str(self.actor_num)),
                    to_byte(str(elapsed_time))
                ])
            except zmq.error.Again as e:
                logger.warning("[Client] Cannot connect to the master."
                               "Please check if it is still alive.")
                self.master_is_alive = False
        socket.close(0)
        logger.warning("Client exit replying heartbeat for master.")
github PaddlePaddle / PARL / parl / remote / remote_decorator.py View on Github external
def request_cpu_resource(self, global_client, max_memory):
                """Try to request cpu resource for 1 second/time for 300 times."""
                cnt = 300
                while cnt > 0:
                    job_address = global_client.submit_job(max_memory)
                    if job_address is not None:
                        return job_address
                    if cnt % 30 == 0:
                        logger.warning(
                            "No vacant cpu resources at the moment, "
                            "will try {} times later.".format(cnt))
                    cnt -= 1
                return None
github PaddlePaddle / PARL / parl / remote / worker.py View on Github external
def _kill_job(self, job_address):
        """Kill a job process and update worker information"""
        success = self.worker_status.remove_job(job_address)
        if success:
            while True:
                initialized_job = self.job_buffer.get()
                initialized_job.worker_address = self.master_heartbeat_address
                if initialized_job.is_alive:
                    self.worker_status.add_job(initialized_job)
                    if not initialized_job.is_alive:  # make sure that the job is still alive.
                        self.worker_status.remove_job(
                            initialized_job.job_address)
                        continue
                else:
                    logger.warning(
                        "[Worker] a dead job found. The job buffer will not accept this one."
                    )
                if initialized_job.is_alive:
                    break

            self.lock.acquire()
            self.request_master_socket.send_multipart([
                remote_constants.NEW_JOB_TAG,
                cloudpickle.dumps(initialized_job),
                to_byte(job_address)
            ])
            _ = self.request_master_socket.recv_multipart()
            self.lock.release()
github PaddlePaddle / PARL / parl / utils / tensorboard.py View on Github external
def call(*args, **kwargs):
        global _writer
        if _writer is None:
            logdir = logger.get_dir()
            if logdir is None:
                logdir = logger.auto_set_dir(action='d')
                logger.warning(
                    "[tensorboard] logdir is None, will save tensorboard files to {}"
                    .format(logdir))
            _writer = SummaryWriter(logdir=logger.get_dir())
        func = getattr(_writer, func_name)
        func(*args, **kwargs)
        _writer.flush()
github PaddlePaddle / PARL / parl / remote / master.py View on Github external
3. A new client connects to the master node.
        4. A connected client submits a job after a remote object is created.
        """
        self.client_socket.linger = 0
        self.client_socket.setsockopt(
            zmq.RCVTIMEO, remote_constants.HEARTBEAT_RCVTIMEO_S * 1000)

        while self.master_is_alive:
            try:
                self._receive_message()
                pass
            except zmq.error.Again as e:
                #detect whether `self.master_is_alive` is True periodically
                pass

        logger.warning("[Master] Exit master.")
github PaddlePaddle / PARL / parl / remote / job.py View on Github external
raise DeserializeError

                    else:
                        traceback_str = str(traceback.format_exc())
                        logger.error("traceback:\n{}".format(traceback_str))
                        reply_socket.send_multipart([
                            remote_constants.EXCEPTION_TAG,
                            to_byte(error_str + "\ntraceback:\n" +
                                    traceback_str)
                        ])
                        break

            # receive DELETE_TAG from actor, and stop replying worker heartbeat
            elif tag == remote_constants.KILLJOB_TAG:
                reply_socket.send_multipart([remote_constants.NORMAL_TAG])
                logger.warning("An actor exits and this job {} will exit.".
                               format(job_address))
                break
            else:
                logger.error(
                    "The job receives an unknown message: {}".format(message))
                raise NotImplementedError