How to use the distributed.utils_test.gen_cluster function in distributed

To help you get started, we’ve selected a few distributed examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dask / dask-ml / tests / model_selection / test_successive_halving.py View on Github external
    @gen_cluster(client=True)
    def _test_sha_max_iter(c, s, a, b):
        model = SGDClassifier(tol=1e-3)
        params = {"alpha": np.logspace(-3, 0, num=1000)}
        search = SuccessiveHalvingSearchCV(
            model, params, n_initial_parameters=n, n_initial_iter=r
        )

        X, y = make_classification()
        yield search.fit(X, y, classes=np.unique(y))

        calls = set(search.cv_results_["partial_fit_calls"]) - {1}
        assert min(calls) == r

        # One model trained to completion
        assert (
            search.cv_results_["partial_fit_calls"] == max(calls)
github dask / dask-ml / tests / model_selection / test_hyperband.py View on Github external
@gen_cluster(client=True, timeout=5000)
def test_min_max_iter(c, s, a, b):
    # This test makes sure Hyperband works with max_iter=1.
    # Tests for max_iter < 1 are in test_incremental.py.
    values = scipy.stats.uniform(0, 1)
    X, y = make_classification(n_samples=10, n_features=4, chunks=10)

    max_iter = 1
    h = HyperbandSearchCV(ConstantFunction(), {"value": values}, max_iter=max_iter)
    yield h.fit(X, y)
    assert h.best_score_ > 0
github dask / dask-ml / tests / model_selection / test_incremental.py View on Github external
@gen_cluster(client=True)
def test_search_patience_infeasible_tol(c, s, a, b):
    X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))

    rng = check_random_state(42)
    params = {"value": rng.rand(1000)}
    model = ConstantFunction()

    max_iter = 10
    score_increase = -10
    search = IncrementalSearchCV(
        model, params, max_iter=max_iter, patience=3, tol=score_increase, decay_rate=0
    )
    yield search.fit(X, y, classes=[0, 1])

    hist = pd.DataFrame(search.history_)
    assert hist.partial_fit_calls.max() == max_iter
github dask / dask-ml / tests / model_selection / test_successive_halving.py View on Github external
@gen_cluster(client=True)
def test_search_patience_infeasible_tol(c, s, a, b):
    X, y = make_classification(n_samples=100, n_features=5)

    params = {"value": np.random.RandomState(42).rand(1000)}
    model = ConstantFunction()

    search = SuccessiveHalvingSearchCV(
        model,
        params,
        patience=2,
        tol=np.nan,
        n_initial_parameters=20,
        n_initial_iter=4,
        max_iter=1000,
    )
    yield search.fit(X, y, classes=[0, 1])
github dask / dask-ml / tests / model_selection / test_incremental.py View on Github external
@gen_cluster(client=True)
def test_min_max_iter(c, s, a, b):
    X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))
    est = SGDClassifier()
    params = {"alpha": np.logspace(-3, 0)}
    search = IncrementalSearchCV(est, params, max_iter=0)
    with pytest.raises(ValueError, match="max_iter < 1 is not supported"):
        yield search.fit(X, y, classes=[0, 1])
github dask / dask-ml / tests / model_selection / test_incremental.py View on Github external
@gen_cluster(client=True)
def test_gridsearch(c, s, a, b):
    X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))

    model = SGDClassifier(tol=1e-3)

    params = {"alpha": np.logspace(-2, 10, 3), "l1_ratio": np.linspace(0.01, 1, 2)}

    search = IncrementalSearchCV(model, params, n_initial_parameters="grid")
    yield search.fit(X, y, classes=[0, 1])

    assert {frozenset(d["params"].items()) for d in search.history_} == {
        frozenset(d.items()) for d in ParameterGrid(params)
    }
github dask / dask-ml / tests / model_selection / test_incremental.py View on Github external
@gen_cluster(client=True)
def test_transform(c, s, a, b):
    X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))
    model = MiniBatchKMeans(random_state=0)
    params = {"n_clusters": [3, 4, 5], "n_init": [1, 2]}
    search = IncrementalSearchCV(model, params, n_initial_parameters="grid")
    yield search.fit(X, y)
    X_, = yield c.compute([X])
    result = search.transform(X_)
    assert result.shape == (100, search.best_estimator_.n_clusters)
github hgrecco / pint / pint / testsuite / test_dask.py View on Github external
@gen_cluster(client=True, timeout=None)
async def test_async(c, s, a, b):
    """Test asynchronous operations."""
    da = dask.array.arange(0, 25, chunks=5, dtype=float).reshape((5, 5))
    q = ureg.Quantity(da, units_)

    x = q + ureg.Quantity(5, units_)
    y = x.persist()
    assert str(y)

    assert dask.is_dask_collection(y)
    assert len(x.__dask_graph__()) > len(y.__dask_graph__())

    assert not futures_of(x)
    assert futures_of(y)

    future = c.compute(y)
github dask / dask-ml / tests / model_selection / test_hyperband.py View on Github external
@gen_cluster(client=True, timeout=5000)
def test_same_random_state_same_params(c, s, a, b):
    # This makes sure parameters are sampled correctly when random state is
    # specified.

    # This test makes sure random state is *correctly* passed to successive
    # halvings from Hyperband
    seed = 0
    values = scipy.stats.uniform(0, 1)
    h = HyperbandSearchCV(
        ConstantFunction(), {"value": values}, random_state=seed, max_iter=9
    )

    # Make a class for passive random sampling
    passive = IncrementalSearchCV(
        ConstantFunction(),
        {"value": values},
github dask / dask-ml / tests / model_selection / test_hyperband.py View on Github external
@gen_cluster(client=True, timeout=5000)
def test_correct_params(c, s, a, b):
    # Makes sure that Hyperband has the correct parameters.

    # Implemented because Hyperband wraps SHA. Again, makes sure that parameters
    # are correctly passed to SHA (had a case where max_iter= flag not passed to
    # SuccessiveHalvingSearchCV but it should have been)
    est = ConstantFunction()
    X, y = make_classification(n_samples=10, n_features=4, chunks=10)
    params = {"value": np.linspace(0, 1)}
    search = HyperbandSearchCV(est, params, max_iter=9)

    base = {
        "estimator",
        "estimator__value",
        "estimator__sleep",
        "parameters",