How to use the streamz.dataframe.aggregations.GroupbyAggregation function in streamz

To help you get started, we’ve selected a few streamz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github python-streamz / streamz / streamz / dataframe / aggregations.py View on Github external
def on_old(self, acc, old, grouper=None):
        g = self.grouped(old, grouper=grouper)
        result = acc.sub(g.sum(), fill_value=0)
        result.index.name = acc.index.name
        return result, result

    def initial(self, new, grouper=None):
        if hasattr(grouper, 'iloc'):
            grouper = grouper.iloc[:0]
        if isinstance(grouper, np.ndarray) or is_index_like(grouper):
            grouper = grouper[:0]
        return self.grouped(new.iloc[:0], grouper=grouper).sum()


class GroupbyCount(GroupbyAggregation):
    def on_new(self, acc, new, grouper=None):
        g = self.grouped(new, grouper=grouper)
        result = acc.add(g.count(), fill_value=0)
        result = result.astype(int)
        result.index.name = acc.index.name
        return result, result

    def on_old(self, acc, old, grouper=None):
        g = self.grouped(old, grouper=grouper)
        result = acc.sub(g.count(), fill_value=0)
        result = result.astype(int)
        result.index.name = acc.index.name
        return result, result

    def initial(self, new, grouper=None):
        if hasattr(grouper, 'iloc'):
github python-streamz / streamz / streamz / dataframe / aggregations.py View on Github external
for k, v in kwargs.items():
            setattr(self, k, v)

    def grouped(self, df, grouper=None):
        if grouper is None:
            grouper = self.grouper

        g = df.groupby(grouper)

        if self.columns is not None:
            g = g[self.columns]

        return g


class GroupbySum(GroupbyAggregation):
    def on_new(self, acc, new, grouper=None):
        g = self.grouped(new, grouper=grouper)
        result = acc.add(g.sum(), fill_value=0)
        result.index.name = acc.index.name
        return result, result

    def on_old(self, acc, old, grouper=None):
        g = self.grouped(old, grouper=grouper)
        result = acc.sub(g.sum(), fill_value=0)
        result.index.name = acc.index.name
        return result, result

    def initial(self, new, grouper=None):
        if hasattr(grouper, 'iloc'):
            grouper = grouper.iloc[:0]
        if isinstance(grouper, np.ndarray) or is_index_like(grouper):
github python-streamz / streamz / streamz / dataframe / aggregations.py View on Github external
class ValueCounts(Aggregation):
    def on_new(self, acc, new, grouper=None):
        result = acc.add(new.value_counts(), fill_value=0).astype(int)
        result.index.name = acc.index.name
        return result, result

    def on_old(self, acc, new, grouper=None):
        result = acc.sub(new.value_counts(), fill_value=0).astype(int)
        result.index.name = acc.index.name
        return result, result

    def initial(self, new, grouper=None):
        return new.iloc[:0].value_counts()


class GroupbyMean(GroupbyAggregation):
    def on_new(self, acc, new, grouper=None):
        totals, counts = acc
        g = self.grouped(new, grouper=grouper)
        totals = totals.add(g.sum(), fill_value=0)
        counts = counts.add(g.count(), fill_value=0)
        totals.index.name = acc[0].index.name
        counts.index.name = acc[1].index.name
        return (totals, counts), totals / counts

    def on_old(self, acc, old, grouper=None):
        totals, counts = acc
        g = self.grouped(old, grouper=grouper)
        totals = totals.sub(g.sum(), fill_value=0)
        counts = counts.sub(g.count(), fill_value=0)
        totals.index.name = acc[0].index.name
        counts.index.name = acc[1].index.name
github python-streamz / streamz / streamz / dataframe / aggregations.py View on Github external
totals = totals.sub(g.sum(), fill_value=0)
        counts = counts.sub(g.count(), fill_value=0)
        totals.index.name = acc[0].index.name
        counts.index.name = acc[1].index.name
        return (totals, counts), totals / counts

    def initial(self, new, grouper=None):
        if hasattr(grouper, 'iloc'):
            grouper = grouper.iloc[:0]
        if isinstance(grouper, np.ndarray) or is_index_like(grouper):
            grouper = grouper[:0]
        g = self.grouped(new.iloc[:0], grouper=grouper)
        return (g.sum(), g.count())


class GroupbyVar(GroupbyAggregation):
    def _compute_result(self, x, x2, n):
        result = (x2 / n) - (x / n) ** 2
        if self.ddof != 0:
            result = result * n / (n - self.ddof)
        return result

    def on_new(self, acc, new, grouper=None):
        x, x2, n = acc
        g = self.grouped(new, grouper=grouper)
        if len(new):
            x = x.add(g.sum(), fill_value=0)
            x2 = x2.add(g.agg(lambda x: (x**2).sum()), fill_value=0)
            n = n.add(g.count(), fill_value=0)

        return (x, x2, n), self._compute_result(x, x2, n)
github python-streamz / streamz / streamz / dataframe / aggregations.py View on Github external
def on_old(self, acc, old, grouper=None):
        g = self.grouped(old, grouper=grouper)
        result = acc.sub(g.count(), fill_value=0)
        result = result.astype(int)
        result.index.name = acc.index.name
        return result, result

    def initial(self, new, grouper=None):
        if hasattr(grouper, 'iloc'):
            grouper = grouper.iloc[:0]
        if isinstance(grouper, np.ndarray) or is_index_like(grouper):
            grouper = grouper[:0]
        return self.grouped(new.iloc[:0], grouper=grouper).count()


class GroupbySize(GroupbyAggregation):
    def on_new(self, acc, new, grouper=None):
        g = self.grouped(new, grouper=grouper)
        result = acc.add(g.size(), fill_value=0)
        result = result.astype(int)
        result.index.name = acc.index.name
        return result, result

    def on_old(self, acc, old, grouper=None):
        g = self.grouped(old, grouper=grouper)
        result = acc.sub(g.size(), fill_value=0)
        result = result.astype(int)
        result.index.name = acc.index.name
        return result, result

    def initial(self, new, grouper=None):
        if hasattr(grouper, 'iloc'):