How to use the dask.array.Array function in dask

To help you get started, we’ve selected a few dask examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github corteva / rioxarray / test / integration / test_integration__io.py View on Github external
def test_no_mftime(self):
        # rasterio can accept "filename" urguments that are actually urls,
        # including paths to remote files.
        # In issue #1816, we found that these caused dask to break, because
        # the modification time was used to determine the dask token. This
        # tests ensure we can still chunk such files when reading with
        # rasterio.
        with create_tmp_geotiff(
            8, 10, 3, transform_args=[1, 2, 0.5, 2.0], crs="+proj=latlong"
        ) as (tmp_file, expected):
            with mock.patch("os.path.getmtime", side_effect=OSError):
                with xr.open_rasterio(tmp_file, chunks=(1, 2, 2)) as actual:
                    assert isinstance(actual.data, da.Array)
                    assert_allclose(actual, expected)
github dmlc / xgboost / demo / dask / sklearn_cpu_training.py View on Github external
y = da.random.random(m, partition_size)

    regressor = xgboost.dask.DaskXGBRegressor(verbosity=1, n_estimators=2)
    regressor.set_params(tree_method='hist')
    # assigning client here is optional
    regressor.client = client

    regressor.fit(X, y, eval_set=[(X, y)])
    prediction = regressor.predict(X)

    bst = regressor.get_booster()
    history = regressor.evals_result()

    print('Evaluation history:', history)
    # returned prediction is always a dask array.
    assert isinstance(prediction, da.Array)
    return bst                  # returning the trained model
github opendatacube / datacube-core / datacube / api.py View on Github external
def _get_array(storage_units, var_name, dimensions, dim_props):
    """
    Create an xarray.DataArray
    :return xarray.DataArray
    """
    dsk_id = str(uuid.uuid1())  # unique name for the requested dask
    dsk = _get_dask_for_storage_units(storage_units, var_name, dimensions, dim_props['dim_vals'], dsk_id)
    _fill_in_dask_blanks(dsk, storage_units, var_name, dimensions, dim_props, dsk_id)

    dtype = storage_units[0].variables[var_name].dtype
    chunks = tuple(tuple(dim_props['sus_size'][dim]) for dim in dimensions)
    dask_array = da.Array(dsk, dsk_id, chunks, dtype=dtype)
    coords = [(dim, dim_props['coord_labels'][dim]) for dim in dimensions]
    xarray_data_array = xarray.DataArray(dask_array, coords=coords)
    return xarray_data_array
github hyperspy / hyperspy / hyperspy / misc / slicing.py View on Github external
def _slicer(self, slices, isNavigation=None, out=None):
        array_slices = self._get_array_slices(slices, isNavigation)
        new_data = self.data[array_slices]
        if new_data.size == 1 and new_data.dtype is np.dtype('O'):
            if isinstance(new_data[0], (np.ndarray, dArray)):
                return self.__class__(new_data[0]).transpose(navigation_axes=0)
            else:
                return new_data[0]

        if out is None:
            _obj = self._deepcopy_with_new_data(new_data,
                                                copy_variance=True)
            _to_remove = []
            for slice_, axis in zip(array_slices, _obj.axes_manager._axes):
                if (isinstance(slice_, slice) or
                        len(self.axes_manager._axes) < 2):
                    axis._slice_me(slice_)
                else:
                    _to_remove.append(axis.index_in_axes_manager)
            for _ind in reversed(sorted(_to_remove)):
                _obj._remove_axis(_ind)
github cortex-lab / phy / phy / io / traces.py View on Github external
with File(filename, mode='r') as f:
        rec_names = sorted([name for name in f['/recordings']])
        shapes = [f['/recordings/{}/data'.format(name)].shape
                  for name in rec_names]

    # Create the dask graph for all recordings from the .kwdd file.
    dask = {('data', idx, 0): (_read_recording, filename, rec_name)
            for (idx, rec_name) in enumerate(rec_names)}

    # Make sure all recordings have the same number of channels.
    n_cols = shapes[0][1]
    assert all(shape[1] == n_cols for shape in shapes)

    # Create the dask Array.
    chunks = (tuple(shape[0] for shape in shapes), (n_cols,))
    return Array(dask, 'data', chunks)
github hyperspy / hyperspy / hyperspy / _signals / eels.py View on Github external
def substract_from_offset(value, signals):
            if isinstance(value, da.Array):
                value = value.compute()
            for signal in signals:
                signal.axes_manager[-1].offset -= value
github Chilipp / psyplot / psyplot / gdal_store.py View on Github external
try:
                    a[a == no_data] = a.dtype.type(nan)
                except ValueError:
                    pass
            return a
        ds = self.ds
        dims = ['lat', 'lon']
        chunks = ((ds.RasterYSize,), (ds.RasterXSize,))
        shape = (ds.RasterYSize, ds.RasterXSize)
        variables = OrderedDict()
        for iband in range(1, ds.RasterCount+1):
            band = ds.GetRasterBand(iband)
            dt = dtype(gdal_array.codes[band.DataType])
            if with_dask:
                dsk = {('x', 0, 0): (load, iband)}
                arr = Array(dsk, 'x', chunks, shape=shape, dtype=dt)
            else:
                arr = load(iband)
            attrs = band.GetMetadata_Dict()
            try:
                dt.type(nan)
                attrs['_FillValue'] = nan
            except ValueError:
                no_data = band.GetNoDataValue()
                attrs.update({'_FillValue': no_data} if no_data else {})
            variables['Band%i' % iband] = Variable(dims, arr, attrs)
        variables['lat'], variables['lon'] = self._load_GeoTransform()
        return FrozenOrderedDict(variables)
github ska-sa / montblanc / montblanc / impl / rime / tensorflow / dataset.py View on Github external
if not np.sum(time_chunks[uts]) == rs.stop - rs.start:
            sum_chunks = np.sum(time_chunks[uts])
            raise ValueError("Sum of time_chunks[%d:%d] '%d' "
                            "does not match the number of rows '%d' "
                            "in the row[%d:%d]" %
                                (uts.start, uts.stop, sum_chunks,
                                rs.stop-rs.start,
                                rs.start, rs.stop))

    # Chunks for 'utime', 'antenna' and 'uvw' dimensions
    chunks = (tuple(utime_groups),
                (xds.dims["antenna"],),
                (xds.dims["(u,v,w)"],))

    # Create dask array and assign it to the dataset
    dask_array = da.Array(dsk, name, chunks, xds.uvw.dtype)
    dims = ("utime", "antenna", "(u,v,w)")
    return xds.assign(antenna_uvw=xr.DataArray(dask_array, dims=dims))
github hyperspy / hyperspy / hyperspy / _signals / signal1d.py View on Github external
lazy, the result is computed in memory because it depends on the \
        current state of the axes that could change later on in the workflow.

        Raises
        ------
        SignalDimensionError
            If the signal dimension is not 1.
        """
        if show_progressbar is None:
            show_progressbar = preferences.General.show_progressbar
        self._check_signal_dimension_equals_one()
        ip = number_of_interpolation_points + 1
        axis = self.axes_manager.signal_axes[0]
        self._check_navigation_mask(mask)
        # we compute for now
        if isinstance(start, da.Array):
            start = start.compute()
        if isinstance(end, da.Array):
            end = end.compute()
        i1, i2 = axis._get_index(start), axis._get_index(end)
        if reference_indices is None:
            reference_indices = self.axes_manager.indices
        ref = self.inav[reference_indices].data[i1:i2]

        if interpolate is True:
            ref = interpolate1D(ip, ref)
        iterating_kwargs = ()
        if mask is not None:
            iterating_kwargs += (('mask', mask),)
        shift_signal = self._map_iterate(
            _estimate_shift1D,
            iterating_kwargs=iterating_kwargs,