How to use the blosc.SHUFFLE function in blosc

To help you get started, we’ve selected a few blosc examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tensorwerk / hangar-py / src / hangar / remote / chunks.py View on Github external
def tensorChunkedIterator(buf, uncomp_nbytes, itemsize, pb2_request, *, err=None):

    buf.seek(0)
    compBytes = blosc.compress(
        buf.getbuffer(), clevel=5, cname='blosclz', typesize=itemsize, shuffle=blosc.SHUFFLE)

    request = pb2_request(
        comp_nbytes=len(compBytes),
        uncomp_nbytes=uncomp_nbytes,
        error=err)
    chunkIterator = chunk_bytes(compBytes)
    for dchunk in chunkIterator:
        request.raw_data = dchunk
        yield request
github dblalock / bolt / experiments / python / amm_main.py View on Github external
def _blosc_compress(buff, elem_sz=8, compressor='zstd', shuffle=blosc.SHUFFLE):
    """Thin wrapper around blosc.compress()

    Params:
        compressor: ['blosclz', 'lz4', 'lz4hc', 'snappy', 'zlib', 'zstd']
        shuffle: one of [blosc.SHUFFLE, blosc.BITSHUFFLE, blosc.NOSHUFFLE]
        elem_sz: int, size in bytes of each element in buff (eg, 4 for fp32)
    Returns:
        compressed buffer as bytes object
    """
    # decomp with blosc.decompress(compressed_buff)
    return blosc.compress(buff, typesize=elem_sz,
                          cname=compressor, shuffle=shuffle)
github Blosc / python-blosc / blosc / toplevel.py View on Github external
def _check_shuffle(shuffle):
    if shuffle not in [blosc.NOSHUFFLE, blosc.SHUFFLE, blosc.BITSHUFFLE]:
        raise ValueError("shuffle can only be one of NOSHUFFLE, SHUFFLE"
                         " and BITSHUFFLE.")
    if (shuffle == blosc.BITSHUFFLE and
        LooseVersion(blosc.blosclib_version) < LooseVersion("1.8.0")):
        raise ValueError("You need C-Blosc 1.8.0 or higher for using"
                         " BITSHUFFLE.")
github Blosc / python-blosc / bench / threadpool.py View on Github external
from __future__ import print_function
import numpy as np
import time
import blosc
from multiprocessing.pool import ThreadPool

nRuns = 5
dtype='int64'
m = 48
N = 2048
MegaBytes = m * N * N * np.dtype(dtype).itemsize / 2**20
maxThreads = blosc.nthreads

BLOCKSIZE = 2**18
CLEVEL = 4
SHUFFLE = blosc.SHUFFLE
COMPRESSOR = 'zstd'

def compressSlice( args ):
    """
    args = (numpy array address, array_size, item_size, bytesList, bytesIndex)
    """
    args[3][args[4]] = blosc.compress_ptr( args[0],  args[1], args[2], \
                       clevel=CLEVEL, shuffle=SHUFFLE, cname=COMPRESSOR )

def decompressSlice( J, list_bytes ):

    pass

def compressStack( imageStack, blosc_threads = 1, pool_threads=maxThreads ):
    """
    Does frame compression using a ThreadPool to distribute the load.
github Blosc / python-blosc / bench / compress_ptr.py View on Github external
# cause page faults here
out_ = np.full(in_.size, fill_value=0, dtype=in_.dtype)
t0 = time.time()
#out_ = np.copy(in_)
out_ = ctypes.memmove(out_.__array_interface__['data'][0],
                      in_.__array_interface__['data'][0], N*8)
tcpy = time.time() - t0
print("  *** ctypes.memmove() *** Time for memcpy():\t%.3f s\t(%.2f GB/s)" % (
    tcpy, (N*8 / tcpy) / 2**30))

print("\nTimes for compressing/decompressing with clevel=%d and %d threads" % (
    clevel, blosc.ncores))
for (in_, label) in arrays:
    print("\n*** %s ***" % label)
    for cname in blosc.compressor_list():
        for filter in [blosc.NOSHUFFLE, blosc.SHUFFLE, blosc.BITSHUFFLE]:
            t0 = time.time()
            c = blosc.compress_ptr(in_.__array_interface__['data'][0],
                                   in_.size, in_.dtype.itemsize,
                                   clevel=clevel, shuffle=filter, cname=cname)
            tc = time.time() - t0
            # cause page faults here
            out = np.full(in_.size, fill_value=0, dtype=in_.dtype)
            t0 = time.time()
            blosc.decompress_ptr(c, out.__array_interface__['data'][0])
            td = time.time() - t0
            assert((in_ == out).all())
            print("  *** %-8s, %-10s *** %6.3f s (%.2f GB/s) / %5.3f s (%.2f GB/s)" % (
                cname, blosc.filters[filter], tc, ((N*8 / tc) / 2**30), td, ((N*8 / td) / 2**30)), end='')
            print("\tCompr. ratio: %5.1fx" % (N*8. / len(c)))
github Blosc / python-blosc / blosc / toplevel.py View on Github external
def compress_ptr(address, items, typesize=8, clevel=9, shuffle=blosc.SHUFFLE,
                 cname='blosclz'):
    """compress_ptr(address, items[, typesize=8, clevel=9, shuffle=blosc.SHUFFLE, cname='blosclz']])

    Compress the data at address with given items and typesize.

    Parameters
    ----------
    address : int or long
        the pointer to the data to be compressed
    items : int
        The number of items (of typesize) to be compressed.
    typesize : int
        The data type size.
    clevel : int (optional)
        The compression level from 0 (no compression) to 9
        (maximum compression).  The default is 9.
github tensorwerk / hangar-py / src / hangar / records / parsing.py View on Github external
-------
    DigestAndBytes
        Two tuple containing ``digest`` and ``raw`` compressed binary encoded
        serialization of commit spec
    """
    spec_dict = {
        'commit_time': commit_time,
        'commit_message': commit_message,
        'commit_user': commit_user,
        'commit_email': commit_email,
    }

    db_spec_val = json.dumps(spec_dict, separators=(',', ':')).encode()
    digest = _hash_func(db_spec_val)
    comp_raw = blosc.compress(
        db_spec_val, typesize=8, clevel=9, shuffle=blosc.SHUFFLE, cname='zlib')
    return DigestAndBytes(digest=digest, raw=comp_raw)