Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
ValueError, "chunk 0 is too small to contain a zstd frame"
):
dctx.decompress_content_dict_chain([zstd.FRAME_HEADER])
with self.assertRaisesRegex(ValueError, "chunk 0 is not a valid zstd frame"):
dctx.decompress_content_dict_chain([b"foo" * 8])
no_size = zstd.ZstdCompressor(write_content_size=False).compress(b"foo" * 64)
with self.assertRaisesRegex(
ValueError, "chunk 0 missing content size in frame"
):
dctx.decompress_content_dict_chain([no_size])
# Corrupt first frame.
frame = zstd.ZstdCompressor().compress(b"foo" * 64)
frame = frame[0:12] + frame[15:]
with self.assertRaisesRegex(
zstd.ZstdError, "chunk 0 did not decompress full frame"
):
dctx.decompress_content_dict_chain([frame])
def test_data_equivalence(self, original, threads, use_dict):
kwargs = {}
# Use a content dictionary because it is cheap to create.
if use_dict:
kwargs["dict_data"] = zstd.ZstdCompressionDict(original[0])
cctx = zstd.ZstdCompressor(level=1, write_checksum=True, **kwargs)
if not hasattr(cctx, "multi_compress_to_buffer"):
self.skipTest("multi_compress_to_buffer not available")
result = cctx.multi_compress_to_buffer(original, threads=-1)
self.assertEqual(len(result), len(original))
# The frame produced via the batch APIs may not be bit identical to that
# produced by compress() because compression parameters are adjusted
# from the first input in batch mode. So the only thing we can do is
# verify the decompressed data matches the input.
dctx = zstd.ZstdDecompressor(**kwargs)
for i, frame in enumerate(result):
self.assertEqual(dctx.decompress(frame), original[i])
def test_multiple_threads(self):
# threads argument will cause multi-threaded ZSTD APIs to be used, which will
# make output different.
refcctx = zstd.ZstdCompressor(write_checksum=True)
reference = [refcctx.compress(b"x" * 64), refcctx.compress(b"y" * 64)]
cctx = zstd.ZstdCompressor(write_checksum=True)
if not hasattr(cctx, "multi_compress_to_buffer"):
self.skipTest("multi_compress_to_buffer not available")
frames = []
frames.extend(b"x" * 64 for i in range(256))
frames.extend(b"y" * 64 for i in range(256))
result = cctx.multi_compress_to_buffer(frames, threads=-1)
self.assertEqual(len(result), 512)
for i in range(512):
if i < 256:
self.assertEqual(result[i].tobytes(), reference[0])
else:
self.assertEqual(result[i].tobytes(), reference[1])
def test_write_size(self):
cctx = zstd.ZstdCompressor(level=3)
dest = OpCountingBytesIO()
with cctx.stream_writer(dest, write_size=1) as compressor:
self.assertEqual(compressor.write(b"foo"), 0)
self.assertEqual(compressor.write(b"bar"), 0)
self.assertEqual(compressor.write(b"foobar"), 0)
self.assertEqual(len(dest.getvalue()), dest._write_count)
def test_stream_source_read_variance(
self, original, level, source_read_size, read_sizes
):
refctx = zstd.ZstdCompressor(level=level)
ref_frame = refctx.compress(original)
cctx = zstd.ZstdCompressor(level=level)
with cctx.stream_reader(
io.BytesIO(original), size=len(original), read_size=source_read_size
) as reader:
chunks = []
while True:
read_size = read_sizes.draw(strategies.integers(-1, 16384))
chunk = reader.read(read_size)
if not chunk and read_size:
break
chunks.append(chunk)
self.assertEqual(b"".join(chunks), ref_frame)
def test_no_context_manager(self):
source = b"foobar" * 60
cctx = zstd.ZstdCompressor()
frame = cctx.compress(source)
dctx = zstd.ZstdDecompressor()
reader = dctx.stream_reader(frame)
self.assertEqual(reader.read(6), b"foobar")
self.assertEqual(reader.read(18), b"foobar" * 3)
self.assertFalse(reader.closed)
# Calling close prevents subsequent use.
reader.close()
self.assertTrue(reader.closed)
with self.assertRaisesRegex(ValueError, "stream is closed"):
reader.read(6)
def test_relative_seeks(
self, original, level, source_read_size, seek_amounts, read_sizes
):
cctx = zstd.ZstdCompressor(level=level)
frame = cctx.compress(original)
dctx = zstd.ZstdDecompressor()
with dctx.stream_reader(frame, read_size=source_read_size) as reader:
while True:
amount = seek_amounts.draw(strategies.integers(0, 16384))
reader.seek(amount, os.SEEK_CUR)
offset = reader.tell()
read_amount = read_sizes.draw(strategies.integers(1, 16384))
chunk = reader.read(read_amount)
if not chunk:
break
def test_stream_source_read_variance(
self, original, level, streaming, source_read_size, read_sizes
):
cctx = zstd.ZstdCompressor(level=level)
if streaming:
source = io.BytesIO()
writer = cctx.stream_writer(source)
writer.write(original)
writer.flush(zstd.FLUSH_FRAME)
source.seek(0)
else:
frame = cctx.compress(original)
source = io.BytesIO(frame)
dctx = zstd.ZstdDecompressor()
chunks = []
with dctx.stream_reader(source, read_size=source_read_size) as reader:
while True:
read_size = read_sizes.draw(strategies.integers(-1, 131072))
chunk = reader.read(read_size)
if not chunk and read_size:
break
chunks.append(chunk)
self.assertEqual(b"".join(chunks), original)
def test_invalid_inputs(self):
dctx = zstd.ZstdDecompressor()
if not hasattr(dctx, "multi_decompress_to_buffer"):
self.skipTest("multi_decompress_to_buffer not available")
with self.assertRaises(TypeError):
dctx.multi_decompress_to_buffer(True)
with self.assertRaises(TypeError):
dctx.multi_decompress_to_buffer((1, 2))
with self.assertRaisesRegex(TypeError, "item 0 not a bytes like object"):
dctx.multi_decompress_to_buffer([u"foo"])
with self.assertRaisesRegex(
ValueError, "could not determine decompressed size of item 0"
):
def test_read_lines(self):
cctx = zstd.ZstdCompressor()
source = b"\n".join(("line %d" % i).encode("ascii") for i in range(1024))
frame = cctx.compress(source)
dctx = zstd.ZstdDecompressor()
reader = dctx.stream_reader(frame)
tr = io.TextIOWrapper(reader, encoding="utf-8")
lines = []
for line in tr:
lines.append(line.encode("utf-8"))
self.assertEqual(len(lines), 1024)
self.assertEqual(b"".join(lines), source)
reader = dctx.stream_reader(frame)
tr = io.TextIOWrapper(reader, encoding="utf-8")
lines = tr.readlines()
self.assertEqual(len(lines), 1024)
self.assertEqual("".join(lines).encode("utf-8"), source)