Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
window = self.n
elif self.value is not None:
diff = aggregations.diff_loc
window = self.value
outstream = stream.accumulate(aggregations.windowed_groupby_accumulator,
agg=agg,
start=None,
returns_state=True,
diff=diff,
window=window)
for typ, s_type in _stream_types[stream_type]:
if isinstance(example, typ):
return s_type(outstream, example)
return Streaming(outstream, example, stream_type=stream_type)
def _accumulate(self, Agg, **kwargs):
stream_type = 'updating'
if isinstance(self.grouper, Streaming):
stream = self.root.stream.zip(self.grouper.stream)
grouper_example = self.grouper.example
agg = Agg(self.index, grouper=None, **kwargs)
else:
stream = self.root.stream
grouper_example = self.grouper
agg = Agg(self.index, grouper=self.grouper, **kwargs)
# Compute example
state = agg.initial(self.root.example, grouper=grouper_example)
if hasattr(grouper_example, 'iloc'):
grouper_example = grouper_example.iloc[:0]
elif isinstance(grouper_example, (np.ndarray, pd.Index)):
grouper_example = grouper_example[:0]
_, example = agg.on_new(state,
self.root.example.iloc[:0],
else:
s = streams[0]
if isinstance(args[0], Streaming):
stream = s.stream.map(func, *args[1:], **kwargs)
else:
other = [(i, arg) for i, arg in enumerate(args)
if not isinstance(arg, Streaming)]
stream = s.stream.map(partial_by_order, function=func, other=other,
**kwargs)
for typ, s_type in _stream_types[stream_type]:
if isinstance(example, typ):
return s_type(stream, example)
return Streaming(stream, example, stream_type=stream_type)
grouper_example = grouper_example.iloc[:0]
elif isinstance(grouper_example, (np.ndarray, pd.Index)):
grouper_example = grouper_example[:0]
_, example = agg.on_new(state,
self.root.example.iloc[:0],
grouper=grouper_example)
outstream = stream.accumulate(aggregations.groupby_accumulator,
agg=agg,
start=None,
returns_state=True)
for typ, s_type in _stream_types[stream_type]:
if isinstance(example, typ):
return s_type(outstream, example)
return Streaming(outstream, example, stream_type=stream_type)
import operator
from time import time
import numpy as np
import pandas as pd
from tornado.ioloop import IOLoop
from tornado import gen
from ..collection import Streaming, _stream_types, OperatorMixin
from ..sources import Source
from ..utils import M
from . import aggregations
class BaseFrame(Streaming):
def round(self, decimals=0):
""" Round elements in frame """
return self.map_partitions(M.round, self, decimals=decimals)
def reset_index(self):
""" Reset Index """
return self.map_partitions(M.reset_index, self)
def set_index(self, index, **kwargs):
""" Set Index """
return self.map_partitions(M.set_index, self, index, **kwargs)
def tail(self, n=5):
""" Round elements in frame """
return self.map_partitions(M.tail, self, n=n)
from .collection import Streaming, _stream_types
import toolz
import toolz.curried
class Batch(Streaming):
""" A Stream of tuples or lists
This streaming collection manages batches of Python objects such as lists
of text or dictionaries. By batching many elements together we reduce
overhead from Python.
This library is typically used at the early stages of data ingestion before
handing off to streaming dataframes
Examples
--------
>>> text = Streaming.from_file(myfile) # doctest: +SKIP
>>> b = text.partition(100).map(json.loads) # doctest: +SKIP
"""
def __init__(self, stream=None, example=None):
if example is None:
def map_partitions(func, *args, **kwargs):
""" Map a function across all batch elements of this stream
The output stream type will be determined by the action of that
function on the example
See Also
--------
Streaming.accumulate_partitions
"""
example = kwargs.pop('example', None)
if example is None:
example = func(*[getattr(arg, 'example', arg) for arg in args], **kwargs)
streams = [arg for arg in args if isinstance(arg, Streaming)]
if 'stream_type' in kwargs:
stream_type = kwargs['stream_type']
else:
stream_type = ('streaming'
if any(s._stream_type == 'streaming' for s in streams)
else 'updating')
if len(streams) > 1:
stream = type(streams[0].stream).zip(*[getattr(arg, 'stream', arg) for arg in args])
stream = stream.map(apply_args, func, kwargs)
else:
s = streams[0]
if isinstance(args[0], Streaming):
stream = s.stream.map(func, *args[1:], **kwargs)