Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_dict(self):
d = zstd.train_dictionary(16384, generate_samples(), k=64, d=16)
cctx = zstd.ZstdCompressor(dict_data=d, level=1)
frames = [cctx.compress(s) for s in generate_samples()]
dctx = zstd.ZstdDecompressor(dict_data=d)
if not hasattr(dctx, "multi_decompress_to_buffer"):
self.skipTest("multi_decompress_to_buffer not available")
result = dctx.multi_decompress_to_buffer(frames)
self.assertEqual([o.tobytes() for o in result], generate_samples())
def test_no_dict_id(self):
samples = []
for i in range(128):
samples.append(b"foo" * 64)
samples.append(b"bar" * 64)
samples.append(b"foobar" * 64)
d = zstd.train_dictionary(1024, samples)
cctx = zstd.ZstdCompressor(level=1, dict_data=d)
with_dict_id = cctx.compress(b"foobarfoobar")
cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
no_dict_id = cctx.compress(b"foobarfoobar")
self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
no_params = zstd.get_frame_parameters(no_dict_id)
with_params = zstd.get_frame_parameters(with_dict_id)
self.assertEqual(no_params.dict_id, 0)
self.assertEqual(with_params.dict_id, 1880053135)
def test_basic(self):
d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
self.assertIsInstance(d.dict_id(), int_type)
data = d.as_bytes()
self.assertEqual(data[0:4], b"\x37\xa4\x30\xec")
self.assertEqual(d.k, 64)
self.assertEqual(d.d, 16)
def test_bad_args(self):
with self.assertRaises(TypeError):
zstd.train_dictionary(8192, u"foo")
with self.assertRaises(ValueError):
zstd.train_dictionary(8192, [u"foo"])
def test_optimize(self):
d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1, d=16)
# This varies by platform.
self.assertIn(d.k, (50, 2000))
self.assertEqual(d.d, 16)
def test_bad_precompute_compress(self):
d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
with self.assertRaisesRegex(ValueError, "must specify one of level or "):
d.precompute_compress()
with self.assertRaisesRegex(ValueError, "must only specify one of level or "):
d.precompute_compress(
level=3, compression_params=zstd.CompressionParameters()
)
def test_no_args(self):
with self.assertRaises(TypeError):
zstd.train_dictionary()
def test_dictionary_multiple(self):
samples = []
for i in range(128):
samples.append(b"foo" * 64)
samples.append(b"bar" * 64)
samples.append(b"foobar" * 64)
d = zstd.train_dictionary(8192, samples)
sources = (b"foobar" * 8192, b"foo" * 8192, b"bar" * 8192)
compressed = []
cctx = zstd.ZstdCompressor(level=1, dict_data=d)
for source in sources:
compressed.append(cctx.compress(source))
dctx = zstd.ZstdDecompressor(dict_data=d)
for i in range(len(sources)):
decompressed = dctx.decompress(compressed[i])
self.assertEqual(decompressed, sources[i])
else:
training_chunks = chunks
train_args = {
"level": args.level,
}
if args.cover_k:
train_args["k"] = args.cover_k
if args.cover_d:
train_args["d"] = args.cover_d
# Always use all available threads in optimize mode.
train_args["threads"] = -1
dict_data = zstd.train_dictionary(args.dict_size, training_chunks, **train_args)
print(
"trained dictionary of size %d (wanted %d) (l=%d)"
% (len(dict_data), args.dict_size, args.level)
)
if args.zlib and args.discrete:
compressed_discrete_zlib = []
ratios = []
for chunk in chunks:
c = zlib.compress(chunk, args.zlib_level)
compressed_discrete_zlib.append(c)
ratios.append(float(len(c)) / float(len(chunk)))
compressed_size = sum(map(len, compressed_discrete_zlib))
ratio = float(compressed_size) / float(orig_size) * 100.0
bad_count = sum(1 for r in ratios if r >= 1.00)