How to use the tiledb.SparseArray function in tiledb

To help you get started, we’ve selected a few tiledb examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mars-project / mars / mars / tensor / expressions / datastore / totiledb.py View on Github external
def totiledb(uri, x, ctx=None, key=None, timestamp=None):
    import tiledb

    x = astensor(x)
    raw_ctx = ctx
    if raw_ctx is None:
        ctx = tiledb.Ctx()

    tiledb_array_type = tiledb.SparseArray if x.issparse() else tiledb.DenseArray
    try:
        tiledb_array = tiledb_array_type(uri=uri, key=key, timestamp=timestamp, ctx=ctx)
        # if already created, we will check the shape and dtype
        check_tiledb_array_with_tensor(x, tiledb_array)
    except tiledb.TileDBError:
        # not exist, as we don't know the tile,
        # we will create the tiledb array in the tile of tensor
        pass

    tiledb_config = None if raw_ctx is None else raw_ctx.config().dict()
    op = TensorTileDBDataStore(tiledb_config=tiledb_config, tiledb_uri=uri,
                               tiledb_key=key, tiledb_timestamp=timestamp,
                               dtype=x.dtype, sparse=x.issparse())
    return op(x)
github TileDB-Inc / TileDB-Py / examples / reading_sparse_layouts.py View on Github external
def read_array(order):
    # Open the array and read from it.
    with tiledb.SparseArray(array_name, mode='r') as A:
        # Get non-empty domain
        print("Non-empty domain: {}".format(A.nonempty_domain()))

        # Slice only rows 1, 2 and cols 2, 3, 4.
        # NOTE: The `query` syntax is required to specify an order
        # other than the default row-major
        data = A.query(attrs=["a"], order=order, coords=True)[1:3, 2:5]
        a_vals = data["a"]
        coords = data["coords"]

        for i in range(coords.shape[0]):
            print("Cell {} has data {}".format(str(coords[i]), str(a_vals[i])))
github mars-project / mars / mars / tensor / datastore / totiledb.py View on Github external
def totiledb(uri, x, ctx=None, key=None, timestamp=None):
    import tiledb

    x = astensor(x)
    raw_ctx = ctx
    if raw_ctx is None:
        ctx = tiledb.Ctx()

    tiledb_array_type = tiledb.SparseArray if x.issparse() else tiledb.DenseArray
    try:
        tiledb_array = tiledb_array_type(uri=uri, key=key, timestamp=timestamp, ctx=ctx)
        # if already created, we will check the shape and dtype
        check_tiledb_array_with_tensor(x, tiledb_array)
    except tiledb.TileDBError:
        # not exist, as we don't know the tile,
        # we will create the tiledb array in the tile of tensor
        pass

    tiledb_config = None if raw_ctx is None else raw_ctx.config().dict()
    op = TensorTileDBDataStore(tiledb_config=tiledb_config, tiledb_uri=uri,
                               tiledb_key=key, tiledb_timestamp=timestamp,
                               dtype=x.dtype, sparse=x.issparse())
    return op(x)
github mars-project / mars / mars / tensor / datasource / from_tiledb.py View on Github external
key = op.tiledb_key
        timestamp = op.tiledb_timestamp

        slcs = []
        for axis in range(chunk.ndim):
            axis_offset = axis_offsets[axis]
            axis_length = chunk.shape[axis]
            slcs.append(slice(axis_offset, axis_offset + axis_length))

        if not op.sparse:
            # read dense array from tiledb
            with tiledb.DenseArray(uri=uri, ctx=tiledb_ctx, key=key, timestamp=timestamp) as tiledb_arr:
                ctx[chunk.key] = tiledb_arr[tuple(slcs)]
        else:
            # read sparse array from tiledb
            with tiledb.SparseArray(uri=uri, ctx=tiledb_ctx, key=key, timestamp=timestamp) as tiledb_arr:
                if tiledb_arr.ndim > 2:
                    raise NotImplementedError(
                        'Does not support to read array with more than 2 dimensions')

                data = tiledb_arr[tuple(slcs)]
                coords = data['coords']

                value = data[tiledb_arr.attr(0).name]
                if tiledb_arr.ndim == 2:
                    # 2-d
                    ij = tuple(coords[tiledb_arr.domain.dim(k).name] - axis_offsets[k]
                               for k in range(tiledb_arr.ndim))
                    spmatrix = sps.coo_matrix((value, ij), shape=chunk.shape)
                    ctx[chunk.key] = SparseNDArray(spmatrix)
                else:
                    # 1-d
github TileDB-Inc / TileDB-Py / examples / libtiledb / tiledb_array_schema.py View on Github external
ctx = tiledb.Ctx()

    # create dimensions
    d1 = tiledb.Dim(ctx, "", domain=(1, 1000), tile=10, dtype="uint64")
    d2 = tiledb.Dim(ctx, "d2", domain=(101, 10000), tile=100, dtype="uint64")

    # create domain
    domain = tiledb.Domain(ctx, d1, d2)

    # create attributes
    a1 = tiledb.Attr(ctx, "", dtype="int32,int32,int32")
    a2 = tiledb.Attr(ctx, "a2", compressor=("gzip", -1), dtype="float32")

    # create sparse array with schema
    schema = tiledb.SparseArray(ctx, "sparse_array_schema",
                                domain=domain, attrs=(a1, a2),
                                capacity=10,
                                tile_order='row-major',
                                cell_order='col-major',
                                coords_compressor=('zstd', 4),
                                offsets_compressor=('blosc-lz', 5))
    schema.dump()

    # Print from schema
    print("From schema properties:")
    print("- Array type: ", "sparse" if schema.sparse else "dense")
    print("- Cell order: ", schema.cell_order)
    print("- Tile order: ", schema.tile_order)
    print("- Capacity: ", schema.capacity)
    print("- Coordinates compressor: ", schema.coords_compressor)
    print("- Offsets compressor: ", schema.offsets_compressor)
github mars-project / mars / mars / tensor / datastore / totiledb.py View on Github external
# dense
            to_store = np.ascontiguousarray(ctx[op.input.key])
            slcs = []
            for axis in range(chunk.ndim):
                axis_offset = int(axis_offsets[axis])
                axis_length = int(op.input.shape[axis])
                slcs.append(slice(axis_offset, axis_offset + axis_length))
            with tiledb.DenseArray(uri=uri, ctx=tiledb_ctx, mode='w',
                                   key=key, timestamp=timestamp) as arr:
                arr[tuple(slcs)] = to_store
            ctx[chunk.key] = np.empty((0,) * chunk.ndim, dtype=chunk.dtype)
        else:
            # sparse
            to_store = ctx[op.input.key].spmatrix.tocoo()
            if to_store.nnz > 0:
                with tiledb.SparseArray(uri=uri, ctx=tiledb_ctx, mode='w',
                                        key=key, timestamp=timestamp) as arr:
                    if chunk.ndim == 1:
                        vec = to_store.col if to_store.shape[0] == 1 else to_store.row
                        vec += axis_offsets[0]
                        arr[vec] = to_store.data
                    else:
                        i, j = to_store.row + axis_offsets[0], to_store.col + axis_offsets[1]
                        arr[i, j] = to_store.data
            ctx[chunk.key] = SparseNDArray(sps.csr_matrix((0, 0), dtype=chunk.dtype),
                                           shape=chunk.shape)
github mars-project / mars / mars / tensor / execution / datasource.py View on Github external
key = chunk.op.tiledb_key
    timestamp = chunk.op.tiledb_timestamp

    slcs = []
    for axis in range(chunk.ndim):
        axis_offset = axis_offsets[axis]
        axis_length = chunk.shape[axis]
        slcs.append(slice(axis_offset, axis_offset + axis_length))

    if not chunk.issparse():
        # read dense array from tiledb
        with tiledb.DenseArray(uri=uri, ctx=tiledb_ctx, key=key, timestamp=timestamp) as tiledb_arr:
            ctx[chunk.key] = tiledb_arr[tuple(slcs)]
    else:
        # read sparse array from tiledb
        with tiledb.SparseArray(uri=uri, ctx=tiledb_ctx, key=key, timestamp=timestamp) as tiledb_arr:
            if tiledb_arr.ndim > 2:
                raise NotImplementedError(
                    'Does not support to read array with more than 2 dimensions')

            data = tiledb_arr[tuple(slcs)]
            coords = data['coords']
            value = data[tiledb_arr.attr(0).name]
            if tiledb_arr.ndim == 2:
                # 2-d
                ij = tuple(coords[tiledb_arr.domain.dim(k).name] - axis_offsets[k]
                           for k in range(tiledb_arr.ndim))
                spmatrix = sps.coo_matrix((value, ij), shape=chunk.shape)
                ctx[chunk.key] = SparseNDArray(spmatrix)
            else:
                # 1-d
                ij = xp.zeros(coords.shape), \