github python-lz4 / python-lz4 / tests / frame / View on Github external
def test_lz4frame_open_write_read_text():
    data = u'This is a test string'
    with'testfile', mode='wt') as fp:
    with'testfile', mode='rt') as fp:
        data_out =
    assert data_out == data
github python-lz4 / python-lz4 / tests / frame / View on Github external
if store_size is True:
        kwargs['source_size'] = len(data)

    kwargs['compression_level'] = compression_level
    kwargs['block_size'] = block_size
    kwargs['block_linked'] = block_linked
    kwargs['content_checksum'] = content_checksum
    kwargs['block_checksum'] = block_checksum
    kwargs['auto_flush'] = auto_flush
    kwargs['return_bytearray'] = return_bytearray
    kwargs['mode'] = 'wb'

    with'testfile', **kwargs) as fp:

    with'testfile', mode='r') as fp:
        data_out =

    assert data_out == data
github ooni / pipeline / af / fastpath / fastpath / View on Github external
def writeout_measurement(msm_jstr, fn, update, tid):
    """Safely write measurement to disk
    # Different processes might be trying to write the same file at the same
    # time due to naming collisions. Use a safe tmpfile and atomic link
    # NamedTemporaryFile creates files with permissions 600
    # but we want other users (Nginx) to be able to read the measurement

    suffix = ".{}.tmp".format(os.getpid())
    with NamedTemporaryFile(suffix=suffix, dir=conf.msmtdir) as f:
        with, "w") as lzf:
            # os.fsync(lzf.fileno())

            final_fname = conf.msmtdir.joinpath(fn)
                os.chmod(, 0o644)
      , final_fname)
            except FileExistsError:
                if update:
                    # update access time - used for cache cleanup
                    # no need to overwrite the file
          "{tid} Refusing to overwrite {final_fname}")
github mars-project / mars / mars / serialize / View on Github external
    def compress(data):  # pragma: no cover
        return data

    def decompress(data):  # pragma: no cover
        return data

    import lz4.frame
        lz4.frame._compression.BUFFER_SIZE = BUFFER_SIZE
    except AttributeError:  # pragma: no cover
    lz4_open = functools.partial(, block_size=lz4.frame.BLOCKSIZE_MAX1MB)
    lz4_compress = functools.partial(lz4.frame.compress, block_size=lz4.frame.BLOCKSIZE_MAX1MB)
    lz4_compressobj = lz4.frame.LZ4FrameCompressor
    lz4_decompress = lz4.frame.decompress
    lz4_decompressobj = lz4.frame.LZ4FrameDecompressor
except ImportError:  # pragma: no cover
    lz4_open = None
    lz4_compress, lz4_compressobj = None, None
    lz4_decompress, lz4_decompressobj = None, None

gz_open =
gz_compressobj = functools.partial(
    lambda level=-1: zlib.compressobj(level, zlib.DEFLATED, 16 + zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0)
gz_decompressobj = functools.partial(lambda: zlib.decompressobj(16 + zlib.MAX_WBITS))
gz_compress = gzip.compress
gz_decompress = gzip.decompress
github CoffeaTeam / coffea / coffea / View on Github external
def load(filename):
    '''Load a coffea file from disk
    with as fin:
        output = cloudpickle.load(fin)
    return output
github ooni / pipeline / af / fastpath / fastpath / View on Github external
                log.debug("Loading nested %s",
                k = tf.extractfile(m)
                assert k is not None
                    for line in k:
                        yield (line, None)

                    continue  # FIXME
                    bucket_tstamp = "FIXME"
                    for msm in iter_yaml_msmt_normalized(k, bucket_tstamp):
                        yield (None, msm)

    elif fn.endswith(".json.lz4"):
        with as f:
            for line in f:
                yield (line, None)

    elif fn.endswith(".yaml.lz4"):
        with as f:
            raise Exception("Unsupported format: YAML")
            # bucket_tstamp = "FIXME"
            # for msm in iter_yaml_msmt_normalized(f, bucket_tstamp):
            #     metrics.incr("yaml_normalization")
            #     yield (None, msm)

        raise RuntimeError(fn)
github karlicoss / my / my / kython / View on Github external
zfile = ZipFile(pp)

        [subpath] = args # meh?

        ## oh god...
        ifile =, mode='r')
        ifile.readable = lambda: True  # type: ignore
        ifile.writable = lambda: False # type: ignore
        ifile.seekable = lambda: False # type: ignore
        ifile.read1    =    # type: ignore
        # TODO pass all kwargs here??
        # todo 'expected "BinaryIO"'??
        return io.TextIOWrapper(ifile, encoding=encoding) # type: ignore[arg-type]
    elif suf in {'.lz4'}:
        import lz4.frame # type: ignore
        return, mode, *args, **kwargs)
    elif suf in {'.zstd'}:
        return _zstd_open(pp, mode, *args, **kwargs)
        return, *args, **kwargs)
github radix-ai / graphchain / graphchain / View on Github external
        if callable(obj):
            objname = f"key={key} function={obj.__name__} hash={objhash}"
            objname = f"key={key} literal={type(obj)} hash={objhash}""EXECUTE {objname}")
        ret = obj(*args, **kwargs) if callable(obj) else obj
        if not skipcache:
            fileext = ".pickle.lz4" if compress else ".pickle"
            filepath = fs.path.join(CACHE_DIRNAME, objhash + fileext)
            if not storage.isfile(filepath):
      "STORE {objname}")
                    with, "wb") as fid:
                        if compress:
                            with, mode='wb') as _fid:
                                joblib.dump(ret, _fid, protocol=4)
                            joblib.dump(ret, fid, protocol=4)
                except Exception:
                    logger.exception("Could not dump object.")
                logger.warning(f"FILE_EXISTS {objname}")
        return ret
github ooni / pipeline / af / fastpath / fastpath / View on Github external
def iter_yaml_lz4_reports(fn):
    """Iterate YAML reports from a lz4 file
    assert str(fn).endswith("lz4")

    fd =
    blobgen = stream_yaml_blobs(fd)

    off, header = next(blobgen)
    headsha = hashlib.sha1(header)
    # XXX: bad header kills whole bucket
    header = yaml.load(header, Loader=CLoader)
    if not header.get("report_id"):
        header["report_id"] = generate_report_id(header)

    for off, entry in blobgen:
        entry_len = len(entry)
        esha = headsha.copy()
        esha = esha.digest()
            entry = yaml.load(entry, Loader=CLoader)