How to use the fastparquet.compression.compressions.keys function in fastparquet

To help you get started, we’ve selected a few fastparquet examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github kootenpv / shrynk / shrynk / classes / pandas_.py View on Github external
if y
        ]
        for x in pyarrow.compress.__doc__.split("\n")
        if "upported types" in x
    ][0]
except ImportError:
    arrow_exceptions = ()
    _pyarrow = []

# OPTIONAL: load fastparquet
try:
    from fastparquet.compression import compressions

    # BROTLI IS BUGGED!
    _fastparquet_opts = [
        {"engine": "fastparquet", "compression": x} for x in compressions.keys() if x != "BROTLI"
    ]
except ImportError:
    _fastparquet_opts = []


def estimate_uniqueness_proportion(df, col, r=10000):
    # sample = serv.Detalle.sample(r)
    n = df.shape[0]
    sample = df[col][np.random.randint(0, n, r)]
    counts = sample.value_counts()
    fis = Counter(counts)
    estimate = math.sqrt(n / r) * fis[1] + sum([fis[x] for x in fis if x > 1])
    return estimate / n


class PandasCompressor(Predictor, BaseCompressor):