How to use the lz4.frame.compress function in lz4

To help you get started, we’ve selected a few lz4 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github CoffeaTeam / coffea / tests / test_spark.py View on Github external
pyspark = pytest.importorskip("pyspark", minversion="2.4.1")
    
    import pandas as pd
    import pickle as pkl
    import lz4.frame as lz4f

    from coffea.util import numpy as np
    from coffea.processor.spark.spark_executor import agg_histos_raw, reduce_histos_raw
    from coffea.processor.test_items import NanoTestProcessor

    proc = NanoTestProcessor()

    one = proc.accumulator.identity()
    two = proc.accumulator.identity()
    hlist1 = [lz4f.compress(pkl.dumps(one))]
    hlist2 = [lz4f.compress(pkl.dumps(one)),lz4f.compress(pkl.dumps(two))]
    harray1 = np.array(hlist1, dtype='O')
    harray2 = np.array(hlist2, dtype='O')
    
    series1 = pd.Series(harray1)
    series2 = pd.Series(harray2)
    df = pd.DataFrame({'histos': harray2})

    # correctness of these functions is checked in test_spark_executor
    agg1 = agg_histos_raw(series1, proc, 1)
    agg2 = agg_histos_raw(series2, proc, 1)
    red = reduce_histos_raw(df, proc, 1)
github python-lz4 / python-lz4 / tests / frame / test_frame_5.py View on Github external
def test_frame_decompress_mem_usage(data):
    tracemalloc = pytest.importorskip('tracemalloc')

    tracemalloc.start()

    compressed = lz4.frame.compress(data)
    prev_snapshot = None

    for i in range(1000):
        decompressed = lz4.frame.decompress(compressed)  # noqa: F841

        if i % 100 == 0:
            gc.collect()
            snapshot = tracemalloc.take_snapshot()

            if prev_snapshot:
                stats = snapshot.compare_to(prev_snapshot, 'lineno')
                assert stats[0].size_diff < MEM_INCREASE_LIMIT

            prev_snapshot = snapshot
github python-lz4 / python-lz4 / tests / frame / test_frame_3.py View on Github external
def test_content_checksum_failure(data):
    compressed = lz4frame.compress(data, content_checksum=True)
    message = r'^LZ4F_decompress failed with code: ERROR_contentChecksum_invalid$'
    with pytest.raises(RuntimeError, message=message):
        last = struct.unpack('B', compressed[-1:])[0]
        lz4frame.decompress(compressed[:-1] + struct.pack('B', last ^ 0x42))
github CoffeaTeam / coffea / coffea / processor / executor.py View on Github external
chunks.append(chunk)
                nchunks[filemeta.dataset] += 1
                if nchunks[filemeta.dataset] >= maxchunks:
                    break

    # pop all _work_function args here
    savemetrics = executor_args.pop('savemetrics', False)
    flatten = executor_args.pop('flatten', False)
    mmap = executor_args.pop('mmap', False)
    nano = executor_args.pop('nano', False)
    cachestrategy = executor_args.pop('cachestrategy', None)
    pi_compression = executor_args.pop('processor_compression', 1)
    if pi_compression is None:
        pi_to_send = processor_instance
    else:
        pi_to_send = lz4f.compress(cloudpickle.dumps(processor_instance), compression_level=pi_compression)
    closure = partial(
        _work_function,
        flatten=flatten,
        savemetrics=savemetrics,
        mmap=mmap,
        nano=nano,
        cachestrategy=cachestrategy,
        skipbadfiles=skipbadfiles,
        retries=retries,
        xrootdtimeout=xrootdtimeout,
    )
    # hack around dask/dask#5503 which is really a silly request but here we are
    if executor is dask_executor:
        executor_args['heavy_input'] = pi_to_send
        closure = partial(closure, processor_instance='heavy')
    else:
github OpenMined / PySyft / syft / serde / serde.py View on Github external
def apply_lz4_compression(decompressed_input_bin) -> tuple:
    """
    Apply LZ4 compression to the input

    Args:
        :param decompressed_input_bin: the binary to be compressed
        :return: a tuple (compressed_result, LZ4)
    """
    return lz4.frame.compress(decompressed_input_bin), LZ4
github PaloAltoNetworks / minemeld-core / minemeld / ft / taxii.py View on Github external
LOG.error('%s - indicator without confidence', self.name)
                sindicator.confidence = "Unknown"  # We shouldn't be here
            elif confidence < 50:
                sindicator.confidence = "Low"
            elif confidence < 75:
                sindicator.confidence = "Medium"
            else:
                sindicator.confidence = "High"

            sindicator.add_indicator_type(type_mapper['indicator_type'])

            sindicator.add_observable(o)

            sp.add_indicator(sindicator)

        spackage = 'lz4'+lz4.frame.compress(
            sp.to_json(),
            compression_level=lz4.frame.COMPRESSIONLEVEL_MINHC
        )
        with self.SR.pipeline() as p:
            p.multi()

            p.zadd(self.redis_skey, score, spid)
            p.hset(self.redis_skey_value, spid, spackage)

            result = p.execute()[0]

        self.statistics['added'] += result
github CoffeaTeam / coffea / coffea / processor / spark / spark_executor.py View on Github external
def agg_histos_raw(series, processor_instance, lz4_clevel):
    goodlines = series[series.str.len() > 0]
    if goodlines.size == 1:  # short-circuit trivial aggregations
        return goodlines[0]
    outhist = processor_instance.accumulator.identity()
    for line in goodlines:
        outhist.add(pkl.loads(lz4f.decompress(line)))
    return lz4f.compress(pkl.dumps(outhist), compression_level=lz4_clevel)
github cloud-custodian / cloud-custodian / tools / c7n_salactus / c7n_salactus / rqworker.py View on Github external
def dumps(o):
    return compress(
        msgpack.packb(o, default=encode_ext, use_bin_type=True))