How to use the dask.array function in dask

To help you get started, we’ve selected a few dask examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dask / dask-ml / tests / test_incremental.py View on Github external
def test_incremental_basic(scheduler, dataframes):
    # Create observations that we know linear models can recover
    n, d = 100, 3
    rng = da.random.RandomState(42)
    X = rng.normal(size=(n, d), chunks=30)
    coef_star = rng.uniform(size=d, chunks=d)
    y = da.sign(X.dot(coef_star))
    y = (y + 1) / 2
    if dataframes:
        X = dd.from_array(X)
        y = dd.from_array(y)

    with scheduler() as (s, [_, _]):
        est1 = SGDClassifier(random_state=0, tol=1e-3, average=True)
        est2 = clone(est1)

        clf = Incremental(est1, random_state=0)
        result = clf.fit(X, y, classes=[0, 1])
        assert result is clf
github dask / dask-ml / tests / preprocessing / test_data.py View on Github external
def test_basic(self, output_distribution):
        rs = da.random.RandomState(0)
        a = dpp.QuantileTransformer(output_distribution=output_distribution)
        b = spp.QuantileTransformer(output_distribution=output_distribution)

        X = rs.uniform(size=(1000, 3), chunks=50)
        a.fit(X)
        b.fit(X)
        assert_estimator_equal(a, b, atol=0.02)

        # set the quantiles, so that from here out, we're exact
        a.quantiles_ = b.quantiles_
        assert_eq_ar(a.transform(X), b.transform(X), atol=1e-7)
        assert_eq_ar(X, a.inverse_transform(a.transform(X)))
github dask / dask-ml / dask_ml / preprocessing / label.py View on Github external
def fit(self, y):
        y = self._check_array(y)

        if isinstance(y, da.Array):
            classes_ = _encode_dask_array(y)
            self.classes_ = classes_.compute()
            self.dtype_ = None
        elif _is_categorical(y):
            self.classes_ = _encode_categorical(y)
            self.dtype_ = y.dtype
        else:
            self.dtype_ = None
            return super(LabelEncoder, self).fit(y)

        return self
github dask / dask-ml / dask_ml / wrappers.py View on Github external
inputs (NumPy array, pandas dataframe, scipy sparse matrix), the
        regular return value is returned.

        If the underlying estimator does not have a ``predict_proba``
        method, then an ``AttributeError`` is raised.

        Parameters
        ----------
        X : array or dataframe

        Returns
        -------
        y : array-like
        """
        self._check_method("predict_log_proba")
        return da.log(self.predict_proba(X))
github ceos-seo / data_cube_utilities / data_cube_utilities / dc_water_classifier.py View on Github external
# Extract dataset bands needed for calculations
    blue = dataset_in.blue
    green = dataset_in.green
    red = dataset_in.red
    nir = dataset_in.nir
    swir1 = dataset_in.swir1
    swir2 = dataset_in.swir2

    classified = _run_regression(blue.data, green.data, red.data, 
                                 nir.data, swir1.data, swir2.data)
    
    classified_clean = classified - classified + no_data
    
    if isinstance(classified_clean, np.ndarray):
        classified_clean = np.where(clean_mask, classified, classified_clean)
    elif isinstance(classified_clean, dask.array.core.Array):
        classified_clean = dask.array.where(clean_mask, classified, classified_clean)
    
    # Create xarray of data
    x_coords = dataset_in[x_coord]
    y_coords = dataset_in[y_coord]

    time = None
    coords = None
    dims = None

    if mosaic:
        coords = [y_coords, x_coords]
        dims = [y_coord, x_coord]
    else:
        time_coords = dataset_in[time_coord]
        coords = [time_coords, y_coords, x_coords]
github mrocklin / slides / images / array.py View on Github external
import dask.array as da

kwargs = {'bgcolor': '#00000000',
          'rankdir': 'BT',
          'node_attr': {'color': 'white',
                        'fontcolor': '#FFFFFF',
                        'penwidth': '3'},
          'edge_attr': {'color': 'white', 'penwidth': '3'}}

x = da.ones((15,), chunks=(5,))
x.visualize('array-1d.svg', **kwargs)
x.sum().visualize('array-1d-sum.svg', **kwargs)

x = da.ones((15, 15), chunks=(5, 5))

x.sum(axis=1).visualize('array-sum.svg', **kwargs)
(x + x.T).visualize('array-xxT.svg', **kwargs)
(x.dot(x.T + 1)).visualize('array-xdotxT.svg', **kwargs)
(x.dot(x.T + 1) - x.mean()).visualize('array-xdotxT-mean.svg', **kwargs)
(x.dot(x.T + 1) - x.mean()).std().visualize('array-xdotxT-mean-std.svg', **kwargs)
github bluesky / databroker / intake_bluesky / intake_bluesky / core.py View on Github external
def event_page(self, doc):

        @dask.delayed
        def delayed_fill(event_page, key):
            self.fill_event_page(event_page, include=key)
            return numpy.asarray(event_page['data'][key])

        descriptor = self._descriptor_cache[doc['descriptor']]
        needs_filling = {key for key, val in descriptor['data_keys'].items()
                         if 'external' in val}
        filled_doc = copy.deepcopy(doc)

        for key in needs_filling:
            shape = extract_shape(descriptor, key)
            dtype = extract_dtype(descriptor, key)
            filled_doc['data'][key] = array.from_delayed(
                delayed_fill(filled_doc, key), shape=shape, dtype=dtype)
        return filled_doc
github dask / dask / dask / dataframe / core.py View on Github external
lines = [str(type(self))]

        if len(self.columns) == 0:
            lines.append('Index: 0 entries')
            lines.append('Empty %s' % type(self).__name__)
            put_lines(buf, lines)
            return

        # Group and execute the required computations
        computations = {}
        if verbose:
            computations.update({'index': self.index, 'count': self.count()})
        if memory_usage:
            computations.update({'memory_usage': self.map_partitions(M.memory_usage, index=True)})
        computations = dict(zip(computations.keys(), da.compute(*computations.values())))

        column_template = "{0:<%d} {1}" % (self.columns.str.len().max() + 5)

        if verbose:
            index = computations['index']
            counts = computations['count']
            lines.append(index.summary())
            column_template = column_template.format('{0}', '{1} non-null {2}')
            column_info = [column_template.format(*x) for x in zip(self.columns, counts, self.dtypes)]
        else:
            column_info = [column_template.format(*x) for x in zip(self.columns, self.dtypes)]

        lines.append('Data columns (total {} columns):'.format(len(self.columns)))
        lines.extend(column_info)
        dtype_counts = ['%s(%d)' % k for k in sorted(self.dtypes.value_counts().iteritems(), key=str)]
        lines.append('dtypes: {}'.format(', '.join(dtype_counts)))
github SciTools / iris / lib / iris / analysis / __init__.py View on Github external
def inner_stat(array, axis=-1, mdtol=None, **kwargs):
        # Call the statistic to get the basic result (missing-data tolerant).
        dask_result = dask_stats_function(array, axis=axis, **kwargs)
        if mdtol is None or mdtol >= 1.0:
            result = dask_result
        else:
            # Build a lazy computation to compare the fraction of missing
            # input points at each output point to the 'mdtol' threshold.
            point_mask_counts = da.sum(da.ma.getmaskarray(array), axis=axis)
            points_per_calc = array.size / dask_result.size
            masked_point_fractions = point_mask_counts / points_per_calc
            boolean_mask = masked_point_fractions > mdtol
            # Return an mdtol-masked version of the basic result.
            result = da.ma.masked_array(
                da.ma.getdata(dask_result), boolean_mask
            )
        return result