How to use the zstandard.ZstdCompressionDict function in zstandard

To help you get started, we’ve selected a few zstandard examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github indygreg / python-zstandard / tests / test_decompressor.py View on Github external
def test_simple(self):
        original = [
            b"foo" * 64,
            b"foobar" * 64,
            b"baz" * 64,
            b"foobaz" * 64,
            b"foobarbaz" * 64,
        ]

        chunks = []
        chunks.append(zstd.ZstdCompressor().compress(original[0]))
        for i, chunk in enumerate(original[1:]):
            d = zstd.ZstdCompressionDict(original[i])
            cctx = zstd.ZstdCompressor(dict_data=d)
            chunks.append(cctx.compress(chunk))

        for i in range(1, len(original)):
            chain = chunks[0:i]
            expected = original[i - 1]
            dctx = zstd.ZstdDecompressor()
            decompressed = dctx.decompress_content_dict_chain(chain)
            self.assertEqual(decompressed, expected)
github indygreg / python-zstandard / tests / test_train_dictionary.py View on Github external
def test_bad_mode(self):
        with self.assertRaisesRegex(ValueError, "invalid dictionary load mode"):
            zstd.ZstdCompressionDict(b"foo", dict_type=42)
github indygreg / python-zstandard / tests / test_decompressor_fuzzing.py View on Github external
def test_data_equivalence(self, original, threads, use_dict):
        kwargs = {}
        if use_dict:
            kwargs["dict_data"] = zstd.ZstdCompressionDict(original[0])

        cctx = zstd.ZstdCompressor(
            level=1, write_content_size=True, write_checksum=True, **kwargs
        )

        if not hasattr(cctx, "multi_compress_to_buffer"):
            self.skipTest("multi_compress_to_buffer not available")

        frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1)

        dctx = zstd.ZstdDecompressor(**kwargs)
        result = dctx.multi_decompress_to_buffer(frames_buffer)

        self.assertEqual(len(result), len(original))
        for i, frame in enumerate(result):
            self.assertEqual(frame.tobytes(), original[i])
github indygreg / python-zstandard / bench.py View on Github external
def decompress_content_dict_read_to_iter(chunks, opts):
    zctx = zstd.ZstdDecompressor(**opts)
    last = b"".join(zctx.read_to_iter(chunks[0]))

    for chunk in chunks[1:]:
        d = zstd.ZstdCompressionDict(last)
        zctx = zstd.ZstdDecompressor(dict_data=d, **opts)
        last = b"".join(zctx.read_to_iter(chunk))
github indygreg / python-zstandard / bench.py View on Github external
def compress_content_dict_read_to_iter(chunks, zparams, use_size=False):
    zctx = zstd.ZstdCompressor(compression_params=zparams)
    size = len(chunks[0]) if use_size else -1
    for o in zctx.read_to_iter(chunks[0], size=size):
        pass

    for i, chunk in enumerate(chunks[1:]):
        d = zstd.ZstdCompressionDict(chunks[i])
        zctx = zstd.ZstdCompressor(dict_data=d, compression_params=zparams)
        size = len(chunk) if use_size else -1
        for o in zctx.read_to_iter(chunk, size=size):
            pass
github indygreg / python-zstandard / bench.py View on Github external
print(
            "stream compressed size (l=%d): %d (%.2f%%)"
            % (zparams.compression_level, compressed_size, ratio)
        )

    if args.content_dict:
        compressed_content_dict = []
        ratios = []
        # First chunk is compressed like normal.
        c = zstd.ZstdCompressor(compression_params=zparams).compress(chunks[0])
        compressed_content_dict.append(c)
        ratios.append(float(len(c)) / float(len(chunks[0])))

        # Subsequent chunks use previous chunk as a dict.
        for i, chunk in enumerate(chunks[1:]):
            d = zstd.ZstdCompressionDict(chunks[i])
            zctx = zstd.ZstdCompressor(dict_data=d, compression_params=zparams)
            c = zctx.compress(chunk)
            compressed_content_dict.append(c)
            ratios.append(float(len(c)) / float(len(chunk)))

        compressed_size = sum(map(len, compressed_content_dict))
        ratio = float(compressed_size) / float(orig_size) * 100.0
        bad_count = sum(1 for r in ratios if r >= 1.00)
        good_ratio = 100.0 - (float(bad_count) / float(len(chunks)) * 100.0)
        print(
            "content dict compressed size (l=%d): %d (%.2f%%); smaller: %.2f%%"
            % (zparams.compression_level, compressed_size, ratio, good_ratio)
        )

    print("")
github elemental-lf / benji / src / benji / transform / zstd.py View on Github external
def __init__(self, *, config: Config, name: str, module_configuration: ConfigDict) -> None:
        super().__init__(config=config, name=name, module_configuration=module_configuration)

        self.level: str = Config.get_from_dict(module_configuration,
                                               'level',
                                               types=int,
                                               check_func=lambda v: v >= 1 and v <= zstandard.MAX_COMPRESSION_LEVEL,
                                               check_message='Option level must be between 1 and {} (inclusive)'.format(
                                                   zstandard.MAX_COMPRESSION_LEVEL))

        dict_data_file: str = Config.get_from_dict(module_configuration, 'dictDataFile', None, types=str)
        if dict_data_file:
            with open(dict_data_file, 'rb') as f:
                dict_data_content = f.read()
            self._dict_data = zstandard.ZstdCompressionDict(dict_data_content, dict_type=zstandard.DICT_TYPE_FULLDICT)
            self._dict_data.precompute_compress(self.level)
        else:
            self._dict_data = None

        self._local = threading.local()
github opendatacube / odc-tools / libs / dscache / odc / dscache / _jsoncache.py View on Github external
db_info = db.open_db(b'info', create=True)

    with db.begin(db_info, write=True) as tr:
        tr.put(b'version', FORMAT_VERSION)

        if zdict is not None:
            tr.put(b'zdict', zdict)

    dbs = SimpleNamespace(main=db,
                          info=db_info,
                          groups=db.open_db(b'groups', create=True),
                          ds=db.open_db(b'ds', create=True),
                          udata=db.open_db(b'udata', create=True))

    comp_params = {'dict_data': zstandard.ZstdCompressionDict(zdict)} if zdict else {}

    comp = zstandard.ZstdCompressor(level=complevel, **comp_params)
    decomp = zstandard.ZstdDecompressor(**comp_params)

    state = SimpleNamespace(dbs=dbs,
                            comp=comp,
                            decomp=decomp)

    return JsonBlobCache(state)
github indygreg / python-zstandard / bench.py View on Github external
def compress_content_dict_compressobj(chunks, zparams, use_size=False):
    zctx = zstd.ZstdCompressor(compression_params=zparams)
    cobj = zctx.compressobj(size=len(chunks[0]) if use_size else -1)
    cobj.compress(chunks[0])
    cobj.flush()

    for i, chunk in enumerate(chunks[1:]):
        d = zstd.ZstdCompressionDict(chunks[i])
        zctx = zstd.ZstdCompressor(dict_data=d, compression_params=zparams)
        cobj = zctx.compressobj(len(chunk) if use_size else -1)
        cobj.compress(chunk)
        cobj.flush()