How to use the zstandard.train_dictionary function in zstandard

To help you get started, we’ve selected a few zstandard examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github indygreg / python-zstandard / tests / test_decompressor.py View on Github external
def test_dict(self):
        d = zstd.train_dictionary(16384, generate_samples(), k=64, d=16)

        cctx = zstd.ZstdCompressor(dict_data=d, level=1)
        frames = [cctx.compress(s) for s in generate_samples()]

        dctx = zstd.ZstdDecompressor(dict_data=d)

        if not hasattr(dctx, "multi_decompress_to_buffer"):
            self.skipTest("multi_decompress_to_buffer not available")

        result = dctx.multi_decompress_to_buffer(frames)

        self.assertEqual([o.tobytes() for o in result], generate_samples())
github indygreg / python-zstandard / tests / test_compressor.py View on Github external
def test_no_dict_id(self):
        samples = []
        for i in range(128):
            samples.append(b"foo" * 64)
            samples.append(b"bar" * 64)
            samples.append(b"foobar" * 64)

        d = zstd.train_dictionary(1024, samples)

        cctx = zstd.ZstdCompressor(level=1, dict_data=d)
        with_dict_id = cctx.compress(b"foobarfoobar")

        cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
        no_dict_id = cctx.compress(b"foobarfoobar")

        self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)

        no_params = zstd.get_frame_parameters(no_dict_id)
        with_params = zstd.get_frame_parameters(with_dict_id)
        self.assertEqual(no_params.dict_id, 0)
        self.assertEqual(with_params.dict_id, 1880053135)
github indygreg / python-zstandard / tests / test_train_dictionary.py View on Github external
def test_basic(self):
        d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
        self.assertIsInstance(d.dict_id(), int_type)

        data = d.as_bytes()
        self.assertEqual(data[0:4], b"\x37\xa4\x30\xec")

        self.assertEqual(d.k, 64)
        self.assertEqual(d.d, 16)
github indygreg / python-zstandard / tests / test_train_dictionary.py View on Github external
def test_bad_args(self):
        with self.assertRaises(TypeError):
            zstd.train_dictionary(8192, u"foo")

        with self.assertRaises(ValueError):
            zstd.train_dictionary(8192, [u"foo"])
github indygreg / python-zstandard / tests / test_train_dictionary.py View on Github external
def test_optimize(self):
        d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1, d=16)

        # This varies by platform.
        self.assertIn(d.k, (50, 2000))
        self.assertEqual(d.d, 16)
github indygreg / python-zstandard / tests / test_train_dictionary.py View on Github external
def test_bad_precompute_compress(self):
        d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)

        with self.assertRaisesRegex(ValueError, "must specify one of level or "):
            d.precompute_compress()

        with self.assertRaisesRegex(ValueError, "must only specify one of level or "):
            d.precompute_compress(
                level=3, compression_params=zstd.CompressionParameters()
            )
github indygreg / python-zstandard / tests / test_train_dictionary.py View on Github external
def test_no_args(self):
        with self.assertRaises(TypeError):
            zstd.train_dictionary()
github indygreg / python-zstandard / tests / test_decompressor.py View on Github external
def test_dictionary_multiple(self):
        samples = []
        for i in range(128):
            samples.append(b"foo" * 64)
            samples.append(b"bar" * 64)
            samples.append(b"foobar" * 64)

        d = zstd.train_dictionary(8192, samples)

        sources = (b"foobar" * 8192, b"foo" * 8192, b"bar" * 8192)
        compressed = []
        cctx = zstd.ZstdCompressor(level=1, dict_data=d)
        for source in sources:
            compressed.append(cctx.compress(source))

        dctx = zstd.ZstdDecompressor(dict_data=d)
        for i in range(len(sources)):
            decompressed = dctx.decompress(compressed[i])
            self.assertEqual(decompressed, sources[i])
github indygreg / python-zstandard / bench.py View on Github external
else:
            training_chunks = chunks

        train_args = {
            "level": args.level,
        }

        if args.cover_k:
            train_args["k"] = args.cover_k
        if args.cover_d:
            train_args["d"] = args.cover_d

        # Always use all available threads in optimize mode.
        train_args["threads"] = -1

        dict_data = zstd.train_dictionary(args.dict_size, training_chunks, **train_args)
        print(
            "trained dictionary of size %d (wanted %d) (l=%d)"
            % (len(dict_data), args.dict_size, args.level)
        )

    if args.zlib and args.discrete:
        compressed_discrete_zlib = []
        ratios = []
        for chunk in chunks:
            c = zlib.compress(chunk, args.zlib_level)
            compressed_discrete_zlib.append(c)
            ratios.append(float(len(c)) / float(len(chunk)))

        compressed_size = sum(map(len, compressed_discrete_zlib))
        ratio = float(compressed_size) / float(orig_size) * 100.0
        bad_count = sum(1 for r in ratios if r >= 1.00)