Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
class Count(Aggregation):
def on_new(self, acc, new):
result = acc + new.count()
return result, result
def on_old(self, acc, old):
result = acc - old.count()
return result, result
def initial(self, new):
return new.iloc[:0].count()
class Size(Aggregation):
def on_new(self, acc, new):
result = acc + new.size
return result, result
def on_old(self, acc, old):
result = acc - old.size
return result, result
def initial(self, new):
return 0
class Var(Aggregation):
def __init__(self, ddof=1):
self.ddof = ddof
def on_old(self, acc, old, grouper=None):
g = self.grouped(old, grouper=grouper)
result = acc.sub(g.size(), fill_value=0)
result = result.astype(int)
result.index.name = acc.index.name
return result, result
def initial(self, new, grouper=None):
if hasattr(grouper, 'iloc'):
grouper = grouper.iloc[:0]
if isinstance(grouper, np.ndarray) or is_index_like(grouper):
grouper = grouper[:0]
return self.grouped(new.iloc[:0], grouper=grouper).size()
class ValueCounts(Aggregation):
def on_new(self, acc, new, grouper=None):
result = acc.add(new.value_counts(), fill_value=0).astype(int)
result.index.name = acc.index.name
return result, result
def on_old(self, acc, new, grouper=None):
result = acc.sub(new.value_counts(), fill_value=0).astype(int)
result.index.name = acc.index.name
return result, result
def initial(self, new, grouper=None):
return new.iloc[:0].value_counts()
class GroupbyMean(GroupbyAggregation):
def on_new(self, acc, new, grouper=None):
""" An accumulation binary operator
This is the function that is actually given to the ``Stream.accumulate``
function.
See Also
--------
windowed_accumulator
windowed_groupby_accumulator
"""
if acc is None:
acc = agg.initial(new)
return agg.on_new(acc, new)
class GroupbyAggregation(Aggregation):
def __init__(self, columns, grouper=None, **kwargs):
self.grouper = grouper
self.columns = columns
for k, v in kwargs.items():
setattr(self, k, v)
def grouped(self, df, grouper=None):
if grouper is None:
grouper = self.grouper
g = df.groupby(grouper)
if self.columns is not None:
g = g[self.columns]
return g
return result, result
def on_old(self, acc, old):
result = acc - old.sum()
return result, result
def initial(self, new):
result = new.sum()
if isinstance(result, Number):
result = 0
else:
result[:] = 0
return result
class Mean(Aggregation):
def on_new(self, acc, new):
totals, counts = acc
if len(new):
totals = totals + new.sum()
counts = counts + new.count()
if isinstance(counts, Number) and counts == 0:
counts = 1
return (totals, counts), totals / counts
def on_old(self, acc, old):
totals, counts = acc
if len(old):
totals = totals - old.sum()
counts = counts - old.count()
if isinstance(counts, Number) and counts == 0:
counts = 1
return (x, x2, n), self._compute_result(x, x2, n)
def initial(self, new):
s = new.sum()
c = new.count()
if isinstance(s, Number):
s = 0
c = 0
else:
s[:] = 0
c[:] = 0
return (s, s, c)
class Full(Aggregation):
""" Return the full window of data every time
This is somewhat expensive, builtin aggregations should be preferred when
possible
"""
def on_new(self, acc, new):
df_package = get_dataframe_package(new)
result = df_package.concat([acc, new])
return result, result
def on_old(self, acc, old):
result = acc.iloc[len(old):]
return result, result
def initial(self, new):
return new.iloc[:0]
if isinstance(counts, Number) and counts == 0:
counts = 1
return (totals, counts), totals / counts
def initial(self, new):
s, c = new.sum(), new.count()
if isinstance(s, Number):
s = 0
c = 0
else:
s[:] = 0
c[:] = 0
return (s, c)
class Count(Aggregation):
def on_new(self, acc, new):
result = acc + new.count()
return result, result
def on_old(self, acc, old):
result = acc - old.count()
return result, result
def initial(self, new):
return new.iloc[:0].count()
class Size(Aggregation):
def on_new(self, acc, new):
result = acc + new.size
return result, result
from __future__ import division, print_function
from collections import deque
from numbers import Number
import numpy as np
import pandas as pd
from .utils import is_series_like, is_index_like, get_dataframe_package
class Aggregation(object):
pass
class Sum(Aggregation):
def on_new(self, acc, new):
if len(new):
result = acc + new.sum()
else:
result = acc
return result, result
def on_old(self, acc, old):
result = acc - old.sum()
return result, result
def initial(self, new):
result = new.sum()
if isinstance(result, Number):
result = 0
else:
class Size(Aggregation):
def on_new(self, acc, new):
result = acc + new.size
return result, result
def on_old(self, acc, old):
result = acc - old.size
return result, result
def initial(self, new):
return 0
class Var(Aggregation):
def __init__(self, ddof=1):
self.ddof = ddof
def _compute_result(self, x, x2, n):
result = (x2 / n) - (x / n) ** 2
if self.ddof != 0:
result = result * n / (n - self.ddof)
return result
def on_new(self, acc, new):
x, x2, n = acc
if len(new):
x = x + new.sum()
x2 = x2 + (new ** 2).sum()
n = n + new.count()