How to use the dask.array.concatenate function in dask

To help you get started, we’ve selected a few dask examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pyxem / pyxem / pyxem / utils / io_utils.py View on Github external
"""
    # Determine dimensions of raw frame data
    a_type = a.dtype
    a_shape = a.shape

    len_a_shape = len(a_shape)
    img_axes = len_a_shape-2, len_a_shape-1
    a_half = int(a_shape[img_axes[0]] / 2), int(a_shape[img_axes[1]] / 2)
    # Define 3 pixel wide cross of zeros to pad raw data
    z_array = da.zeros((a_shape[0],a_shape[1],3), dtype = a_type)
    z_array2 = da.zeros((a_shape[0],3,a_shape[img_axes[1]]+3), dtype = a_type)
    # Insert blank cross into raw data

    b = da.concatenate((a[:,:,:a_half[1]],z_array, a[:,:,a_half[1]:]), axis = -1)

    b = da.concatenate((b[:, :a_half[0],:], z_array2, b[:,a_half[0]:,:]), axis = -2)

    return b
github intake / intake / intake / dask.py View on Github external
return dd.from_pandas(data, chunksize=chunksize, **kwargs)
        elif source.container == 'list':
            return db.from_sequence(source.read(), **kwargs)
        else:
            raise ValueError('Unknown container type: %s' % source.container)
    else:
            # Strategy depends on source properties
        if source.get_chunks_supported:
            chunks = source.get_chunks(chunksize)
            futures = client.map(read_func, chunks)
        else:
            futures = client.map(read_func, [source])

        if source.container == 'ndarray':
            array_parts = [da.from_delayed(f, shape=c.shape, dtype=c.dtype) for f, c in zip(futures, chunks)]
            return da.concatenate(array_parts, axis=0)
        elif source.container == 'dataframe':
            return dd.from_delayed(futures)
        elif source.container == 'list':
            return db.from_delayed(futures)
        else:
            raise ValueError('Unknown container type: %s' % source.container)
github kwikteam / phy-contrib / phycontrib / csicsvari / traces.py View on Github external
def _concat_dask(arrs):
    """NOTE: dask is currently not used because of severe performance issues"""
    from dask.array import concatenate, from_array
    return concatenate([from_array(arr, chunks=arr.shape) for arr in arrs],
                       axis=0)
github pytroll / satpy / satpy / readers / seviri_l2_bufr.py View on Github external
msgCount = 0
            while True:
                bufr = ec.codes_bufr_new_from_file(fh)
                if bufr is None:
                    break

                ec.codes_set(bufr, 'unpack', 1)

                # if is the first message initialise our final array
                if (msgCount == 0):
                    arr = da.from_array(ec.codes_get_array(
                        bufr, key, float), chunks=CHUNK_SIZE)
                else:
                    tmpArr = da.from_array(ec.codes_get_array(
                        bufr, key, float), chunks=CHUNK_SIZE)
                    arr = da.concatenate((arr, tmpArr))

                msgCount = msgCount+1
                ec.codes_release(bufr)

        if arr.size == 1:
            arr = arr[0]

        return arr
github dask / dask-ml / dask_ml / metrics / pairwise.py View on Github external
XD = X.to_delayed().flatten().tolist()
    func = delayed(metrics.pairwise_distances_argmin_min, pure=True, nout=2)
    blocks = [func(x, Y, metric=metric, metric_kwargs=metric_kwargs) for x in XD]
    argmins, mins = zip(*blocks)

    argmins = [
        da.from_delayed(block, (chunksize,), np.int64)
        for block, chunksize in zip(argmins, X.chunks[0])
    ]
    # Scikit-learn seems to always use float64
    mins = [
        da.from_delayed(block, (chunksize,), "f8")
        for block, chunksize in zip(mins, X.chunks[0])
    ]
    argmins = da.concatenate(argmins)
    mins = da.concatenate(mins)
    return argmins, mins
github hyperspy / hyperspy / hyperspy / _signals / signal1d.py View on Github external
thelist.extend([tapered, therest])
                dc = da.concatenate(thelist, axis=-1)
            else:
                dc[..., offset:channels + offset] *= (
                    np.hanning(2 * channels)[:channels])
                dc[..., :offset] *= 0.
        if side == 'right' or side == 'both':
            rl = None if offset == 0 else -offset
            if self._lazy:
                therest = dc[..., :-channels - offset]
                tapered = dc[..., -channels - offset:rl]
                tapered *= np.hanning(2 * channels)[-channels:]
                thelist = [therest, tapered]
                if offset != 0:
                    thelist.append(zeros)
                dc = da.concatenate(thelist, axis=-1)
            else:
                dc[..., -channels - offset:rl] *= (
                    np.hanning(2 * channels)[-channels:])
                if offset != 0:
                    dc[..., -offset:] *= 0.

        if self._lazy:
            self.data = dc
        self.events.data_changed.trigger(obj=self)
        return channels
github ESGF / esgf-compute-wps / compute / compute_tasks / compute_tasks / cdat.py View on Github external
shape = list(dataarray.shape)

    logger.info('Chunk index %s size %s, data array shape %s', index, size, shape)

    chunk_slices = [slice(x, min(x+chunk_step, size)) for x in range(0, size, chunk_step)]

    chunk_shapes = [update_shape(shape.copy(), index, x.stop-x.start) for x in chunk_slices]

    logger.info('Split variable into %s chunks', len(chunk_slices))

    delayed = [dask.delayed(get_protected_data)(url, var_name, cert, time=x) for x in chunk_slices]

    dask_da = [da.from_delayed(y, shape=x, dtype=dataarray.dtype) for x, y in zip(chunk_shapes, delayed)]

    concat = da.concatenate(dask_da, axis=0)

    logger.info('Created dask array shape %s', concat.shape)

    return concat
github darothen / xbpch / xbpch / core.py View on Github external
def _concat(self, *args, **kwargs):
        if self._dask:
            return da.concatenate(*args, **kwargs)
        else:
            return np.concatenate(*args, **kwargs)
github xgcm / xgcm / xgcm / models / mitgcm / mds_store.py View on Github external
(slice(nside*n, nside*(n+1)),))
            data_face = ar[face_slice]
            face_arrays.append(data_face)

    # We can't concatenate using numpy (hcat etc.) because it makes a copy,
    # presumably loading the memmaps into memory.
    # Using dask gets around this.
    # But what if we want different chunks, or already chunked the data
    # upstream? Doesn't seem like this is ideal
    # TODO: Refactor handling of dask arrays and chunking
    #return np.concatenate(face_arrays, axis=jdim)
    # the dask version doesn't work because of this:
    # https://github.com/dask/dask/issues/1645
    face_arrays_dask = [da.from_array(fa, chunks=fa.shape)
                        for fa in face_arrays]
    concat = da.concatenate(face_arrays_dask, axis=jdim)
    return concat
github nmileva / starfm4py / src / starfm4py.py View on Github external
def da_stack(folder, shape):
    da_list = [] 
    full_path = path + folder
    max_blocks = shape[0]//windowSize + 1 
    
    for block in range(1,max_blocks + 1):
        for row in range(0,windowSize):
            name = str(block) + 'r' + str(row)
            full_name = full_path + name + '.zarr'
            try:
                da_array = da.from_zarr(full_name)
                da_list.append(da_array) 
            except Exception:
                continue
      
    return da.rechunk(da.concatenate(da_list, axis=0), chunks = (shape[1],windowSize**2))