How to use the streamz.dataframe.aggregations.Aggregation function in streamz

To help you get started, we’ve selected a few streamz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github python-streamz / streamz / streamz / dataframe / aggregations.py View on Github external
class Count(Aggregation):
    def on_new(self, acc, new):
        result = acc + new.count()
        return result, result

    def on_old(self, acc, old):
        result = acc - old.count()
        return result, result

    def initial(self, new):
        return new.iloc[:0].count()


class Size(Aggregation):
    def on_new(self, acc, new):
        result = acc + new.size
        return result, result

    def on_old(self, acc, old):
        result = acc - old.size
        return result, result

    def initial(self, new):
        return 0


class Var(Aggregation):
    def __init__(self, ddof=1):
        self.ddof = ddof
github python-streamz / streamz / streamz / dataframe / aggregations.py View on Github external
def on_old(self, acc, old, grouper=None):
        g = self.grouped(old, grouper=grouper)
        result = acc.sub(g.size(), fill_value=0)
        result = result.astype(int)
        result.index.name = acc.index.name
        return result, result

    def initial(self, new, grouper=None):
        if hasattr(grouper, 'iloc'):
            grouper = grouper.iloc[:0]
        if isinstance(grouper, np.ndarray) or is_index_like(grouper):
            grouper = grouper[:0]
        return self.grouped(new.iloc[:0], grouper=grouper).size()


class ValueCounts(Aggregation):
    def on_new(self, acc, new, grouper=None):
        result = acc.add(new.value_counts(), fill_value=0).astype(int)
        result.index.name = acc.index.name
        return result, result

    def on_old(self, acc, new, grouper=None):
        result = acc.sub(new.value_counts(), fill_value=0).astype(int)
        result.index.name = acc.index.name
        return result, result

    def initial(self, new, grouper=None):
        return new.iloc[:0].value_counts()


class GroupbyMean(GroupbyAggregation):
    def on_new(self, acc, new, grouper=None):
github python-streamz / streamz / streamz / dataframe / aggregations.py View on Github external
""" An accumulation binary operator

    This is the function that is actually given to the ``Stream.accumulate``
    function.

    See Also
    --------
    windowed_accumulator
    windowed_groupby_accumulator
    """
    if acc is None:
        acc = agg.initial(new)
    return agg.on_new(acc, new)


class GroupbyAggregation(Aggregation):
    def __init__(self, columns, grouper=None, **kwargs):
        self.grouper = grouper
        self.columns = columns
        for k, v in kwargs.items():
            setattr(self, k, v)

    def grouped(self, df, grouper=None):
        if grouper is None:
            grouper = self.grouper

        g = df.groupby(grouper)

        if self.columns is not None:
            g = g[self.columns]

        return g
github python-streamz / streamz / streamz / dataframe / aggregations.py View on Github external
return result, result

    def on_old(self, acc, old):
        result = acc - old.sum()
        return result, result

    def initial(self, new):
        result = new.sum()
        if isinstance(result, Number):
            result = 0
        else:
            result[:] = 0
        return result


class Mean(Aggregation):
    def on_new(self, acc, new):
        totals, counts = acc
        if len(new):
            totals = totals + new.sum()
            counts = counts + new.count()
        if isinstance(counts, Number) and counts == 0:
            counts = 1
        return (totals, counts), totals / counts

    def on_old(self, acc, old):
        totals, counts = acc
        if len(old):
            totals = totals - old.sum()
            counts = counts - old.count()
        if isinstance(counts, Number) and counts == 0:
            counts = 1
github python-streamz / streamz / streamz / dataframe / aggregations.py View on Github external
return (x, x2, n), self._compute_result(x, x2, n)

    def initial(self, new):
        s = new.sum()
        c = new.count()
        if isinstance(s, Number):
            s = 0
            c = 0
        else:
            s[:] = 0
            c[:] = 0
        return (s, s, c)


class Full(Aggregation):
    """ Return the full window of data every time

    This is somewhat expensive, builtin aggregations should be preferred when
    possible
    """
    def on_new(self, acc, new):
        df_package = get_dataframe_package(new)
        result = df_package.concat([acc, new])
        return result, result

    def on_old(self, acc, old):
        result = acc.iloc[len(old):]
        return result, result

    def initial(self, new):
        return new.iloc[:0]
github python-streamz / streamz / streamz / dataframe / aggregations.py View on Github external
if isinstance(counts, Number) and counts == 0:
            counts = 1
        return (totals, counts), totals / counts

    def initial(self, new):
        s, c = new.sum(), new.count()
        if isinstance(s, Number):
            s = 0
            c = 0
        else:
            s[:] = 0
            c[:] = 0
        return (s, c)


class Count(Aggregation):
    def on_new(self, acc, new):
        result = acc + new.count()
        return result, result

    def on_old(self, acc, old):
        result = acc - old.count()
        return result, result

    def initial(self, new):
        return new.iloc[:0].count()


class Size(Aggregation):
    def on_new(self, acc, new):
        result = acc + new.size
        return result, result
github python-streamz / streamz / streamz / dataframe / aggregations.py View on Github external
from __future__ import division, print_function

from collections import deque
from numbers import Number

import numpy as np
import pandas as pd
from .utils import is_series_like, is_index_like, get_dataframe_package


class Aggregation(object):
    pass


class Sum(Aggregation):
    def on_new(self, acc, new):
        if len(new):
            result = acc + new.sum()
        else:
            result = acc
        return result, result

    def on_old(self, acc, old):
        result = acc - old.sum()
        return result, result

    def initial(self, new):
        result = new.sum()
        if isinstance(result, Number):
            result = 0
        else:
github python-streamz / streamz / streamz / dataframe / aggregations.py View on Github external
class Size(Aggregation):
    def on_new(self, acc, new):
        result = acc + new.size
        return result, result

    def on_old(self, acc, old):
        result = acc - old.size
        return result, result

    def initial(self, new):
        return 0


class Var(Aggregation):
    def __init__(self, ddof=1):
        self.ddof = ddof

    def _compute_result(self, x, x2, n):
        result = (x2 / n) - (x / n) ** 2
        if self.ddof != 0:
            result = result * n / (n - self.ddof)
        return result

    def on_new(self, acc, new):
        x, x2, n = acc
        if len(new):
            x = x + new.sum()
            x2 = x2 + (new ** 2).sum()
            n = n + new.count()