How to use the dask.array.from_delayed function in dask

To help you get started, we’ve selected a few dask examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github bluesky / databroker / intake_bluesky / intake_bluesky / core.py View on Github external
def event(self, doc):

        @dask.delayed
        def delayed_fill(event, key):
            self.fill_event(event, include=key)
            return numpy.asarray(event['data'][key])

        descriptor = self._descriptor_cache[doc['descriptor']]
        needs_filling = {key for key, val in descriptor['data_keys'].items()
                         if 'external' in val}
        filled_doc = copy.deepcopy(doc)

        for key in needs_filling:
            shape = extract_shape(descriptor, key)
            dtype = extract_dtype(descriptor, key)
            filled_doc['data'][key] = array.from_delayed(
                delayed_fill(filled_doc, key), shape=shape, dtype=dtype)
        return filled_doc
github dask / dask-ml / dask_ml / cluster / spectral.py View on Github external
else:
            X_keep = X[keep]

        X_rest = X[rest]

        A, B = embed(X_keep, X_rest, n_components, metric, params)
        _log_array(logger, A, "A")
        _log_array(logger, B, "B")

        # now the approximation of C
        a = A.sum(0)  # (l,)
        b1 = B.sum(1)  # (l,)
        b2 = B.sum(0)  # (m,)

        # TODO: I think we have some unnecessary delayed wrapping of A here.
        A_inv = da.from_delayed(delayed(pinv)(A), A.shape, A.dtype)

        inner = A_inv.dot(b1)
        d1_si = 1 / da.sqrt(a + b1)

        d2_si = 1 / da.sqrt(b2 + B.T.dot(inner))  # (m,), dask array

        # d1, d2 are diagonal, so we can avoid large matrix multiplies
        # Equivalent to diag(d1_si) @ A @ diag(d1_si)
        A2 = d1_si.reshape(-1, 1) * A * d1_si.reshape(1, -1)  # (n, n)
        _log_array(logger, A2, "A2")
        # A2 = A2.rechunk(A2.shape)
        # Equivalent to diag(d1_si) @ B @ diag(d2_si)
        B2 = da.multiply(da.multiply(d1_si.reshape(-1, 1), B), d2_si.reshape(1, -1))
        _log_array(logger, B2, "B2")

        U_A, S_A, V_A = delayed(svd, pure=True, nout=3)(A2)
github AllenCellModeling / aicsimageio / aicsimageio / readers / lif_reader.py View on Github external
current_dim_begin_index + curr_dim_index
                for current_dim_begin_index, curr_dim_index in zip(begin_indicies, i)
            )

            # Zip the dims with the read indices
            this_chunk_read_dims = dict(
                zip(blocked_dimension_order, this_chunk_read_indicies)
            )

            # Remove the dimensions that we want to chunk by from the read dims
            for d in chunk_by_dims:
                if d in this_chunk_read_dims:
                    this_chunk_read_dims.pop(d)

            # Add delayed array to lazy arrays at index
            lazy_arrays[i] = da.from_delayed(
                delayed(LifReader._imread)(
                    img, offsets, read_lengths, lif.xml_root, this_chunk_read_dims
                ),
                shape=sample_chunk_shape,
                dtype=sample.dtype,
            )

        # Convert the numpy array of lazy readers into a dask array and fill the inner
        # most empty dimensions with chunks
        merged = da.block(lazy_arrays.tolist())

        # Because we have set certain dimensions to be chunked and others not
        # we will need to transpose back to original dimension ordering
        # Example being, if the original dimension ordering was "SZYX" and we want to
        # chunk by "S", "Y", and "X" we created an array with dimensions ordering "ZSYX"
        transpose_indices = []
github pytroll / satpy / satpy / readers / eps_l1b.py View on Github external
def get_full_angles(self):
        """Get the interpolated lons/lats."""
        if (self.sun_azi is not None and self.sun_zen is not None and
                self.sat_azi is not None and self.sat_zen is not None):
            return self.sun_azi, self.sun_zen, self.sat_azi, self.sat_zen

        self.sun_azi, self.sun_zen, self.sat_azi, self.sat_zen = self._get_full_angles()
        self.sun_azi = da.from_delayed(self.sun_azi, dtype=self["ANGULAR_RELATIONS"].dtype,
                                       shape=(self.scanlines, self.pixels))
        self.sun_zen = da.from_delayed(self.sun_zen, dtype=self["ANGULAR_RELATIONS"].dtype,
                                       shape=(self.scanlines, self.pixels))
        self.sat_azi = da.from_delayed(self.sat_azi, dtype=self["ANGULAR_RELATIONS"].dtype,
                                       shape=(self.scanlines, self.pixels))
        self.sat_zen = da.from_delayed(self.sat_zen, dtype=self["ANGULAR_RELATIONS"].dtype,
                                       shape=(self.scanlines, self.pixels))
        return self.sun_azi, self.sun_zen, self.sat_azi, self.sat_zen
github dask / dask-ml / dask_ml / feature_extraction / text.py View on Github external
def transform(self, raw_X):
        msg = "'X' should be a 1-dimensional array with length 'num_samples'."

        if not dask.is_dask_collection(raw_X):
            return self._hasher(**self.get_params()).transform(raw_X)

        if isinstance(raw_X, db.Bag):
            bag2 = raw_X.map_partitions(self._transformer)
            objs = bag2.to_delayed()
            arrs = [
                da.from_delayed(obj, (np.nan, self.n_features), self.dtype)
                for obj in objs
            ]
            result = da.concatenate(arrs, axis=0)
        elif isinstance(raw_X, dd.Series):
            result = raw_X.map_partitions(self._transformer)
        elif isinstance(raw_X, da.Array):
            # dask.Array
            chunks = ((np.nan,) * raw_X.numblocks[0], (self.n_features,))
            if raw_X.ndim == 1:
                result = raw_X.map_blocks(
                    self._transformer, dtype="f8", chunks=chunks, new_axis=1
                )
            else:
                raise ValueError(msg)
        else:
            raise ValueError(msg)
github SciTools / iris / lib / iris / fileformats / pp.py View on Github external
# First ensure the mask array is boolean (not int).
                mask = mask.astype(bool)
                result = ma.masked_all(mask.shape, dtype=dtype)
                # Apply the fill-value from the proxy object.
                # Note: 'values' is just 'proxy' in a dask wrapper.  This arg
                # must be a dask type so that 'delayed' can recognise it, but
                # that provides no access to the underlying fill value.
                result.fill_value = proxy.mdi
                n_values = np.sum(mask)
                if n_values > 0:
                    # Note: data field can have excess values, but not fewer.
                    result[mask] = values[:n_values]
                return result

            delayed_result = calc_array(mask_field_array, delayed_valid_values)
            lazy_result_array = da.from_delayed(
                delayed_result, shape=block_shape, dtype=dtype
            )
            field.data = lazy_result_array
github snarkai / Hub / hub / collections / dataset / core.py View on Github external
#                 for objs in persisted.values()
            #             }
            #         )
            #         == 1
            #     ), "All numpy arrays returned from call should have same len"
            lens = {
                key: [dask.delayed(len)(objs[j]) for j in range(batch_count)]
                for key, objs in persisted.items()
            }
            lens, keys = _dict_to_tuple(lens)
            lens = client.gather(client.compute(lens))
            lens = _tuple_to_dict(lens, keys)
            for key, objs in persisted.items():
                arr = dask.array.concatenate(
                    [
                        dask.array.from_delayed(
                            obj,
                            dtype=self._tensors[key].dtype,
                            shape=(lens[key][i],) + tuple(self._tensors[key].shape[1:]),
                        )
                        for i, obj in enumerate(objs)
                    ]
                )
                if collected[key] is None:
                    collected[key] = arr
                else:
                    collected[key] = dask.array.concatenate(collected[key], arr)
            # tasks = [obj for key, objs in persisted.items() for obj in objs]
            tasks = []

            for key in list(collected.keys()):
                c = collected[key]
github ESGF / esgf-compute-wps / compute / compute_tasks / compute_tasks / managers / input_manager.py View on Github external
def from_delayed(fm, uri, var, chunk_shape):
    size = var.shape[0]

    step = chunk_shape[0]

    logger.info('Building input from delayed functions size %r step %r', size, step)

    chunks = [slice(x, min(size, x+step), 1) for x in range(0, size, step)]

    logger.info('Generated %r chunks', len(chunks))

    delayed = [dask.delayed(retrieve_chunk)(uri, var.id, {'time': x}, fm.cert_data) for x in chunks]

    arrays = [da.from_delayed(x, new_shape(var.shape, y), var.dtype) for x, y in zip(delayed, chunks)]

    return da.concatenate(arrays, axis=0)
github simpeg / simpeg / SimPEG / potential_fields / magnetics / simulation.py View on Github external
def Jtvec(self, m, v, f=None):

        if self.coordinate_system == 'cartesian':
            dmudm = self.chiMap.deriv(m)
        else:
            dmudm = self.dSdm * self.chiMap.deriv(m)

        if self.modelType == 'amplitude':

            dfdm_v = dask.delayed(csr.dot)(v, self.dfdm)

            vec = da.from_delayed(dfdm_v, dtype=float, shape=[self.dfdm.shape[0]])

            if getattr(self, '_Mxyz', None) is not None:

                jtvec = da.dot(vec.astype(np.float32), self.G)

                Jtvec = dask.delayed(csr.dot)(jtvec, self.Mxyz)

            else:
                Jtvec = da.dot(vec.astype(np.float32), self.G)

        else:

            Jtvec = da.dot(v.astype(np.float32), self.G)

        dmudm_v = dask.delayed(csr.dot)(Jtvec, dmudm)
github jlevy44 / PathFlowAI / pathflowai / utils.py View on Github external
img = openslide.open_slide(svs_file)
	if type(img) is openslide.OpenSlide:
		gen = deepzoom.DeepZoomGenerator(
			img, tile_size=tile_size, overlap=overlap, limit_bounds=True)
		max_level = len(gen.level_dimensions) - 1
		n_tiles_x, n_tiles_y = gen.level_tiles[max_level]

		@dask.delayed(pure=True)
		def get_tile(level, column, row):
			tile = gen.get_tile(level, (column, row))
			return np.array(tile).transpose((1, 0, 2))

		sample_tile_shape = get_tile(max_level, 0, 0).shape.compute()
		rows = range(n_tiles_y - (0 if not remove_last else 1))
		cols = range(n_tiles_x - (0 if not remove_last else 1))
		arr = da.concatenate([da.concatenate([da.from_delayed(get_tile(max_level, col, row), sample_tile_shape, np.uint8) for row in rows],
											 allow_unknown_chunksizes=allow_unknown_chunksizes, axis=1) for col in cols], allow_unknown_chunksizes=allow_unknown_chunksizes)
		if transpose:
			arr=arr.transpose([1, 0, 2])
		return arr
	else:  # img is instance of openslide.ImageSlide
		return dask_image.imread.imread(svs_file)