How to use distributed - 10 common examples

To help you get started, we’ve selected a few distributed examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dask / dask-yarn / dask_yarn / cli / dask_yarn_scheduler.py View on Github external
enable_proctitle_on_current()
    enable_proctitle_on_children()

    if sys.platform.startswith('linux'):
        import resource   # module fails importing on Windows
        soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
        limit = max(soft, hard // 2)
        resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))

    addr = uri_from_host_port('', None, 0)

    loop = IOLoop.current()

    services = {}
    bokeh = False
    with ignoring(ImportError):
        from distributed.bokeh.scheduler import BokehScheduler
        services[('bokeh', 0)] = (BokehScheduler, {})
        bokeh = True

    scheduler = Scheduler(loop=loop, services=services)
    scheduler.start(addr)

    install_signal_handlers(loop)

    app_client.kv['dask.scheduler'] = scheduler.address.encode()

    if bokeh:
        bokeh_port = scheduler.services['bokeh'].port
        bokeh_host = urlparse(scheduler.address).hostname
        bokeh_address = 'http://%s:%d' % (bokeh_host, bokeh_port)
github dask / distributed / distributed / diagnostics / task_stream.py View on Github external
def bisect(target, left, right):
            if left == right:
                return left

            mid = (left + right) // 2
            value = max(
                startstop["stop"] for startstop in self.buffer[mid]["startstops"]
            )

            if value < target:
                return bisect(target, mid + 1, right)
            else:
                return bisect(target, left, mid)

        if isinstance(start, str):
            start = time() - parse_timedelta(start)
        if start is not None:
            start = bisect(start, 0, len(self.buffer))

        if isinstance(stop, str):
            stop = time() - parse_timedelta(stop)
        if stop is not None:
            stop = bisect(stop, 0, len(self.buffer))

        if count is not None:
            if start is None and stop is None:
                stop = len(self.buffer)
                start = stop - count
            elif start is None and stop is not None:
                start = stop - count
            elif start is not None and stop is None:
                stop = start + count
github dask / distributed / distributed / comm / tcp.py View on Github external
def set_tcp_timeout(comm):
    """
    Set kernel-level TCP timeout on the stream.
    """
    if comm.closed():
        return

    timeout = dask.config.get("distributed.comm.timeouts.tcp")
    timeout = int(parse_timedelta(timeout, default="seconds"))

    sock = comm.socket

    # Default (unsettable) value on Windows
    # https://msdn.microsoft.com/en-us/library/windows/desktop/dd877220(v=vs.85).aspx
    nprobes = 10
    assert timeout >= nprobes + 1, "Timeout too low"

    idle = max(2, timeout // 4)
    interval = max(1, (timeout - idle) // nprobes)
    idle = timeout - interval * nprobes
    assert idle > 0

    try:
        if sys.platform.startswith("win"):
            logger.debug("Setting TCP keepalive: idle=%d, interval=%d", idle, interval)
github OryJonay / anytime-gridsearch / AnyTimeGridSearchCV / grids / tests.py View on Github external
def test_dask_cv_single(self):
        test_cluster = LocalCluster(1)
        test_client = Client(test_cluster)
        iris = load_iris()
        reg = tree.DecisionTreeClassifier()
        cv_score = test_client.submit(cross_val_score,reg,iris.data,iris.target)
        self.assertGreater(cv_score.result().mean(), 0)
        test_cluster.scale_up(4)
        _cv_results = {'reg_%i':test_client.submit(cross_val_score,
                                                  tree.DecisionTreeClassifier(min_samples_leaf=i),iris.data,iris.target)
                      for i in range(5)} 
        cv_results = test_client.gather(list(_cv_results.values()))
        for cv_result in cv_results:
            self.assertGreaterEqual(cv_result.mean(), 0)
github OryJonay / anytime-gridsearch / AnyTimeGridSearchCV / grids / tests.py View on Github external
def test_grids_list_post(self):
        iris = load_iris()
        client = DjangoClient()
        response = client.post(reverse('grids_list'), data={'classifier':'DecisionTreeClassifier'})
        self.assertEqual(201, response.status_code)
        print(response.data)
        gs = ATGridSearchCV(tree.DecisionTreeClassifier,{'criterion':['gini','entropy'],
                                                         'max_depth':range(1,6),
                                                         'max_features':['auto','log2']},
                            client_kwargs={'address':LocalCluster()},
                            uuid=response.data.get('uuid',''),
                            webserver_url=self.live_server_url)
        gs.fit(iris.data, iris.target)
        response = client.get(reverse('grids_list'))
        self.assertEqual(200,response.status_code)
        self.assertEqual(1, len(response.data))
github dask / dask-ml / tests / model_selection / test_successive_halving.py View on Github external
    @gen_cluster(client=True)
    def _test_sha_max_iter(c, s, a, b):
        model = SGDClassifier(tol=1e-3)
        params = {"alpha": np.logspace(-3, 0, num=1000)}
        search = SuccessiveHalvingSearchCV(
            model, params, n_initial_parameters=n, n_initial_iter=r
        )

        X, y = make_classification()
        yield search.fit(X, y, classes=np.unique(y))

        calls = set(search.cv_results_["partial_fit_calls"]) - {1}
        assert min(calls) == r

        # One model trained to completion
        assert (
            search.cv_results_["partial_fit_calls"] == max(calls)
github dask / dask-ml / tests / model_selection / test_hyperband.py View on Github external
@gen_cluster(client=True, timeout=5000)
def test_min_max_iter(c, s, a, b):
    # This test makes sure Hyperband works with max_iter=1.
    # Tests for max_iter < 1 are in test_incremental.py.
    values = scipy.stats.uniform(0, 1)
    X, y = make_classification(n_samples=10, n_features=4, chunks=10)

    max_iter = 1
    h = HyperbandSearchCV(ConstantFunction(), {"value": values}, max_iter=max_iter)
    yield h.fit(X, y)
    assert h.best_score_ > 0
github dask / dask-ml / tests / model_selection / test_incremental.py View on Github external
@gen_cluster(client=True)
def test_search_patience_infeasible_tol(c, s, a, b):
    X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))

    rng = check_random_state(42)
    params = {"value": rng.rand(1000)}
    model = ConstantFunction()

    max_iter = 10
    score_increase = -10
    search = IncrementalSearchCV(
        model, params, max_iter=max_iter, patience=3, tol=score_increase, decay_rate=0
    )
    yield search.fit(X, y, classes=[0, 1])

    hist = pd.DataFrame(search.history_)
    assert hist.partial_fit_calls.max() == max_iter
github dask / dask-ml / tests / model_selection / test_successive_halving.py View on Github external
@gen_cluster(client=True)
def test_search_patience_infeasible_tol(c, s, a, b):
    X, y = make_classification(n_samples=100, n_features=5)

    params = {"value": np.random.RandomState(42).rand(1000)}
    model = ConstantFunction()

    search = SuccessiveHalvingSearchCV(
        model,
        params,
        patience=2,
        tol=np.nan,
        n_initial_parameters=20,
        n_initial_iter=4,
        max_iter=1000,
    )
    yield search.fit(X, y, classes=[0, 1])
github dask / dask-ml / tests / model_selection / test_incremental.py View on Github external
@gen_cluster(client=True)
def test_min_max_iter(c, s, a, b):
    X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))
    est = SGDClassifier()
    params = {"alpha": np.logspace(-3, 0)}
    search = IncrementalSearchCV(est, params, max_iter=0)
    with pytest.raises(ValueError, match="max_iter < 1 is not supported"):
        yield search.fit(X, y, classes=[0, 1])