How to use the distributed.metrics.time function in distributed

To help you get started, we’ve selected a few distributed examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dask / distributed / distributed / worker.py View on Github external
def trigger_profile(self):
        """
        Get a frame from all actively computing threads

        Merge these frames into existing profile counts
        """
        if not self.active_threads:  # hope that this is thread-atomic?
            return
        start = time()
        with self.active_threads_lock:
            active_threads = self.active_threads.copy()
        frames = sys._current_frames()
        frames = {ident: frames[ident] for ident in active_threads}
        llframes = {}
        if self.low_level_profiler:
            llframes = {ident: profile.ll_get_stack(ident) for ident in active_threads}
        for ident, frame in frames.items():
            if frame is not None:
                key = key_split(active_threads[ident])
                llframe = llframes.get(ident)

                state = profile.process(
                    frame, True, self.profile_recent, stop="distributed/worker.py"
                )
                profile.llprocess(llframe, None, state)
github dask / distributed / distributed / profile.py View on Github external
def get_profile(history, recent=None, start=None, stop=None, key=None):
    """ Collect profile information from a sequence of profile states

    Parameters
    ----------
    history: Sequence[Tuple[time, Dict]]
        A list or deque of profile states
    recent: dict
        The most recent accumulating state
    start: time
    stop: time
    """
    now = time()
    if start is None:
        istart = 0
    else:
        istart = bisect.bisect_left(history, (start,))

    if stop is None:
        istop = None
    else:
        istop = bisect.bisect_right(history, (stop,)) + 1
        if istop >= len(history):
            istop = None  # include end

    if istart == 0 and istop is None:
        history = list(history)
    else:
        iistop = len(history) if istop is None else istop
github dask / distributed / distributed / scheduler.py View on Github external
DELAY = 0.1
        try:
            import psutil
            proc = psutil.Process()
            last = time()

            while self.status != 'closed':
                yield gen.sleep(DELAY)
                while not self.rprocessing:
                    yield gen.sleep(DELAY)
                last = time()

                for w, processing in list(self.processing.items()):
                    while proc.cpu_percent() > 50:
                        yield gen.sleep(DELAY)
                        last = time()

                    if w not in self.workers or not processing:
                        continue

                    self._reevaluate_occupancy_worker(w)

                    duration = time() - last
                    if duration > 0.005:  # 5ms since last release
                        yield gen.sleep(duration * 5)  # 25ms gap
                        last = time()
        except Exception:
            logger.error("Error in reevaluate occupancy", exc_info=True)
            raise
github dask / distributed / distributed / worker.py View on Github external
async def heartbeat(self):
        if not self.heartbeat_active:
            self.heartbeat_active = True
            logger.debug("Heartbeat: %s" % self.address)
            try:
                start = time()
                response = await retry_operation(
                    self.scheduler.heartbeat_worker,
                    address=self.contact_address,
                    now=time(),
                    metrics=await self.get_metrics(),
                )
                end = time()
                middle = (start + end) / 2

                self._update_latency(end - start)

                if response["status"] == "missing":
                    for i in range(10):
                        if self.status != Status.running:
                            break
                        else:
                            await asyncio.sleep(0.05)
                    else:
                        await self._register_with_scheduler()
                    return
                self.scheduler_delay = response["time"] - middle
                self.periodic_callbacks["heartbeat"].callback_time = (
                    response["heartbeat-interval"] * 1000
github dask / distributed / distributed / client.py View on Github external
if w.scheduler.address == self.scheduler.address:
                    direct = True

        if local_worker:  # running within task
            local_worker.update_data(data=data, report=False)

            yield self.scheduler.update_data(
                who_has={key: [local_worker.address] for key in data},
                nbytes=valmap(sizeof, data),
                client=self.id)

        else:
            data2 = valmap(to_serialize, data)
            if direct:
                ncores = None
                start = time()
                while not ncores:
                    if ncores is not None:
                        yield gen.sleep(0.1)
                    if time() > start + timeout:
                        raise gen.TimeoutError("No valid workers found")
                    ncores = yield self.scheduler.ncores(workers=workers)
                if not ncores:
                    raise ValueError("No valid workers")

                _, who_has, nbytes = yield scatter_to_workers(ncores, data2,
                                                              report=False,
                                                              rpc=self.rpc)

                yield self.scheduler.update_data(who_has=who_has,
                                                 nbytes=nbytes,
                                                 client=self.id)
github dask / distributed / distributed / worker.py View on Github external
def _maybe_deserialize_task(self, key):
        if not isinstance(self.tasks[key], SerializedTask):
            return self.tasks[key]
        try:
            start = time()
            function, args, kwargs = _deserialize(*self.tasks[key])
            stop = time()

            if stop - start > 0.010:
                self.startstops[key].append(
                    {"action": "deserialize", "start": start, "stop": stop}
                )
            return function, args, kwargs
        except Exception as e:
            logger.warning("Could not deserialize task", exc_info=True)
            emsg = error_message(e)
            emsg["key"] = key
            emsg["op"] = "task-erred"
            self.batched_stream.send(emsg)
            self.log.append((key, "deserialize-error"))
            raise
github dask / distributed / distributed / worker.py View on Github external
def put_key_in_memory(self, key, value, transition=True):
        if key in self.data:
            return

        if key in self.actors:
            self.actors[key] = value

        else:
            start = time()
            self.data[key] = value
            stop = time()
            if stop - start > 0.020:
                self.startstops[key].append(
                    {"action": "disk-write", "start": start, "stop": stop}
                )

        if key not in self.nbytes:
            self.nbytes[key] = sizeof(value)

        self.types[key] = type(value)

        for dep in self.dependents.get(key, ()):
            if dep in self.waiting_for_data:
                if key in self.waiting_for_data[dep]:
                    self.waiting_for_data[dep].remove(key)
                if not self.waiting_for_data[dep]:
                    self.transition(dep, "ready")
github dask / distributed / distributed / semaphore.py View on Github external
def elapsed(self):
        return time() - self.started_at
github dask / distributed / distributed / profile.py View on Github external
def _watch(thread_id, log, interval="20ms", cycle="2s", omit=None, stop=lambda: False):
    interval = parse_timedelta(interval)
    cycle = parse_timedelta(cycle)

    recent = create()
    last = time()

    while not stop():
        if time() > last + cycle:
            log.append((time(), recent))
            recent = create()
            last = time()
        try:
            frame = sys._current_frames()[thread_id]
        except KeyError:
            return

        process(frame, None, recent, omit=omit)
        sleep(interval)
github dask / distributed / distributed / core.py View on Github external
from .counter import Digest

            self.digests = defaultdict(partial(Digest, loop=self.io_loop))

        from .counter import Counter

        self.counters = defaultdict(partial(Counter, loop=self.io_loop))
        self.events = defaultdict(lambda: deque(maxlen=10000))
        self.event_counts = defaultdict(lambda: 0)

        self.periodic_callbacks = dict()

        pc = PeriodicCallback(self.monitor.update, 500)
        self.periodic_callbacks["monitor"] = pc

        self._last_tick = time()
        measure_tick_interval = parse_timedelta(
            dask.config.get("distributed.admin.tick.interval"), default="ms"
        )
        pc = PeriodicCallback(self._measure_tick, measure_tick_interval * 1000)
        self.periodic_callbacks["tick"] = pc

        self.thread_id = 0

        def set_thread_ident():
            self.thread_id = threading.get_ident()

        self.io_loop.add_callback(set_thread_ident)
        self._startup_lock = asyncio.Lock()
        self.status = Status.undefined

        self.rpc = ConnectionPool(